]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512fintrin.h
avx512fintrin.h (_mm512_mask_cvtepi32_storeu_epi8): New.
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef int __v16si __attribute__ ((__vector_size__ (64)));
42
43 /* The Intel API is flexible enough that we must allow aliasing with other
44 vector types, and their scalar components. */
45 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
46 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
47 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
48
49 typedef unsigned char __mmask8;
50 typedef unsigned short __mmask16;
51
52 /* Rounding mode macros. */
53 #define _MM_FROUND_TO_NEAREST_INT 0x00
54 #define _MM_FROUND_TO_NEG_INF 0x01
55 #define _MM_FROUND_TO_POS_INF 0x02
56 #define _MM_FROUND_TO_ZERO 0x03
57 #define _MM_FROUND_CUR_DIRECTION 0x04
58 #define _MM_FROUND_NO_EXC 0x05
59
60 extern __inline __m512i
61 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
62 _mm512_set_epi64 (long long __A, long long __B, long long __C,
63 long long __D, long long __E, long long __F,
64 long long __G, long long __H)
65 {
66 return __extension__ (__m512i) (__v8di)
67 { __H, __G, __F, __E, __D, __C, __B, __A };
68 }
69
70 /* Create the vector [A B C D E F G H I J K L M N O P]. */
71 extern __inline __m512i
72 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
73 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
74 int __E, int __F, int __G, int __H,
75 int __I, int __J, int __K, int __L,
76 int __M, int __N, int __O, int __P)
77 {
78 return __extension__ (__m512i)(__v16si)
79 { __P, __O, __N, __M, __L, __K, __J, __I,
80 __H, __G, __F, __E, __D, __C, __B, __A };
81 }
82
83 extern __inline __m512d
84 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
85 _mm512_set_pd (double __A, double __B, double __C, double __D,
86 double __E, double __F, double __G, double __H)
87 {
88 return __extension__ (__m512d)
89 { __H, __G, __F, __E, __D, __C, __B, __A };
90 }
91
92 extern __inline __m512
93 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
94 _mm512_set_ps (float __A, float __B, float __C, float __D,
95 float __E, float __F, float __G, float __H,
96 float __I, float __J, float __K, float __L,
97 float __M, float __N, float __O, float __P)
98 {
99 return __extension__ (__m512)
100 { __P, __O, __N, __M, __L, __K, __J, __I,
101 __H, __G, __F, __E, __D, __C, __B, __A };
102 }
103
104 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
105 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
106
107 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
108 e8,e9,e10,e11,e12,e13,e14,e15) \
109 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
110
111 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
112 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
113
114 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
115 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
116
117 extern __inline __m512
118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119 _mm512_setzero_ps (void)
120 {
121 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
122 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
123 }
124
125 extern __inline __m512d
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127 _mm512_setzero_pd (void)
128 {
129 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
130 }
131
132 extern __inline __m512i
133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
134 _mm512_setzero_si512 (void)
135 {
136 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
137 }
138
139 extern __inline __m512d
140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
141 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
142 {
143 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
144 (__v8df) __W,
145 (__mmask8) __U);
146 }
147
148 extern __inline __m512d
149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
150 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
151 {
152 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
153 (__v8df)
154 _mm512_setzero_pd (),
155 (__mmask8) __U);
156 }
157
158 extern __inline __m512
159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
160 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
161 {
162 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
163 (__v16sf) __W,
164 (__mmask16) __U);
165 }
166
167 extern __inline __m512
168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
169 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
170 {
171 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
172 (__v16sf)
173 _mm512_setzero_ps (),
174 (__mmask16) __U);
175 }
176
177 extern __inline __m512d
178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179 _mm512_load_pd (void const *__P)
180 {
181 return *(__m512d *) __P;
182 }
183
184 extern __inline __m512d
185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
187 {
188 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
189 (__v8df) __W,
190 (__mmask8) __U);
191 }
192
193 extern __inline __m512d
194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
195 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
196 {
197 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
198 (__v8df)
199 _mm512_setzero_pd (),
200 (__mmask8) __U);
201 }
202
203 extern __inline void
204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
205 _mm512_store_pd (void *__P, __m512d __A)
206 {
207 *(__m512d *) __P = __A;
208 }
209
210 extern __inline void
211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
213 {
214 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
215 (__mmask8) __U);
216 }
217
218 extern __inline __m512
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm512_load_ps (void const *__P)
221 {
222 return *(__m512 *) __P;
223 }
224
225 extern __inline __m512
226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
227 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
228 {
229 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
230 (__v16sf) __W,
231 (__mmask16) __U);
232 }
233
234 extern __inline __m512
235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
236 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
237 {
238 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
239 (__v16sf)
240 _mm512_setzero_ps (),
241 (__mmask16) __U);
242 }
243
244 extern __inline void
245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
246 _mm512_store_ps (void *__P, __m512 __A)
247 {
248 *(__m512 *) __P = __A;
249 }
250
251 extern __inline void
252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
253 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
254 {
255 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
256 (__mmask16) __U);
257 }
258
259 extern __inline __m512i
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
262 {
263 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
264 (__v8di) __W,
265 (__mmask8) __U);
266 }
267
268 extern __inline __m512i
269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
270 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
271 {
272 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
273 (__v8di)
274 _mm512_setzero_si512 (),
275 (__mmask8) __U);
276 }
277
278 extern __inline __m512i
279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280 _mm512_load_epi64 (void const *__P)
281 {
282 return *(__m512i *) __P;
283 }
284
285 extern __inline __m512i
286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
288 {
289 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
290 (__v8di) __W,
291 (__mmask8) __U);
292 }
293
294 extern __inline __m512i
295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
296 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
297 {
298 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
299 (__v8di)
300 _mm512_setzero_si512 (),
301 (__mmask8) __U);
302 }
303
304 extern __inline void
305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
306 _mm512_store_epi64 (void *__P, __m512i __A)
307 {
308 *(__m512i *) __P = __A;
309 }
310
311 extern __inline void
312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
313 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
314 {
315 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
316 (__mmask8) __U);
317 }
318
319 extern __inline __m512i
320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
321 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
322 {
323 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
324 (__v16si) __W,
325 (__mmask16) __U);
326 }
327
328 extern __inline __m512i
329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
331 {
332 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
333 (__v16si)
334 _mm512_setzero_si512 (),
335 (__mmask16) __U);
336 }
337
338 extern __inline __m512i
339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
340 _mm512_load_si512 (void const *__P)
341 {
342 return *(__m512i *) __P;
343 }
344
345 extern __inline __m512i
346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347 _mm512_load_epi32 (void const *__P)
348 {
349 return *(__m512i *) __P;
350 }
351
352 extern __inline __m512i
353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
354 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
355 {
356 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
357 (__v16si) __W,
358 (__mmask16) __U);
359 }
360
361 extern __inline __m512i
362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
363 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
364 {
365 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
366 (__v16si)
367 _mm512_setzero_si512 (),
368 (__mmask16) __U);
369 }
370
371 extern __inline void
372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
373 _mm512_store_si512 (void *__P, __m512i __A)
374 {
375 *(__m512i *) __P = __A;
376 }
377
378 extern __inline void
379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
380 _mm512_store_epi32 (void *__P, __m512i __A)
381 {
382 *(__m512i *) __P = __A;
383 }
384
385 extern __inline void
386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
388 {
389 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
390 (__mmask16) __U);
391 }
392
393 extern __inline __m512i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
396 {
397 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
398 (__v16si) __B,
399 (__v16si)
400 _mm512_setzero_si512 (),
401 (__mmask16) -1);
402 }
403
404 extern __inline __m512i
405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
406 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
407 {
408 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
409 (__v16si) __B,
410 (__v16si)
411 _mm512_setzero_si512 (),
412 __M);
413 }
414
415 extern __inline __m512i
416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
418 {
419 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
420 (__v16si) __B,
421 (__v16si) __W, __M);
422 }
423
424 extern __inline __m512i
425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
427 {
428 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
429 (__v16si) __Y,
430 (__v16si)
431 _mm512_setzero_si512 (),
432 (__mmask16) -1);
433 }
434
435 extern __inline __m512i
436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
437 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
438 {
439 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
440 (__v16si) __Y,
441 (__v16si) __W,
442 (__mmask16) __U);
443 }
444
445 extern __inline __m512i
446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
448 {
449 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
450 (__v16si) __Y,
451 (__v16si)
452 _mm512_setzero_si512 (),
453 (__mmask16) __U);
454 }
455
456 extern __inline __m512i
457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
459 {
460 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
461 (__v16si) __Y,
462 (__v16si)
463 _mm512_setzero_si512 (),
464 (__mmask16) -1);
465 }
466
467 extern __inline __m512i
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
470 {
471 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
472 (__v16si) __Y,
473 (__v16si) __W,
474 (__mmask16) __U);
475 }
476
477 extern __inline __m512i
478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
480 {
481 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
482 (__v16si) __Y,
483 (__v16si)
484 _mm512_setzero_si512 (),
485 (__mmask16) __U);
486 }
487
488 extern __inline __m512i
489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
490 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
491 {
492 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
493 (__v16si) __Y,
494 (__v16si)
495 _mm512_setzero_si512 (),
496 (__mmask16) -1);
497 }
498
499 extern __inline __m512i
500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
502 {
503 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
504 (__v16si) __Y,
505 (__v16si) __W,
506 (__mmask16) __U);
507 }
508
509 extern __inline __m512i
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
512 {
513 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
514 (__v16si) __Y,
515 (__v16si)
516 _mm512_setzero_si512 (),
517 (__mmask16) __U);
518 }
519
520 extern __inline __m512i
521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
522 _mm512_add_epi64 (__m512i __A, __m512i __B)
523 {
524 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
525 (__v8di) __B,
526 (__v8di)
527 _mm512_setzero_si512 (),
528 (__mmask8) -1);
529 }
530
531 extern __inline __m512i
532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
533 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
534 {
535 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
536 (__v8di) __B,
537 (__v8di) __W,
538 (__mmask8) __U);
539 }
540
541 extern __inline __m512i
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
544 {
545 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
546 (__v8di) __B,
547 (__v8di)
548 _mm512_setzero_si512 (),
549 (__mmask8) __U);
550 }
551
552 extern __inline __m512i
553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554 _mm512_sub_epi64 (__m512i __A, __m512i __B)
555 {
556 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
557 (__v8di) __B,
558 (__v8di)
559 _mm512_setzero_pd (),
560 (__mmask8) -1);
561 }
562
563 extern __inline __m512i
564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
565 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
566 {
567 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
568 (__v8di) __B,
569 (__v8di) __W,
570 (__mmask8) __U);
571 }
572
573 extern __inline __m512i
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
576 {
577 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
578 (__v8di) __B,
579 (__v8di)
580 _mm512_setzero_si512 (),
581 (__mmask8) __U);
582 }
583
584 extern __inline __m512i
585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
586 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
587 {
588 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
589 (__v8di) __Y,
590 (__v8di)
591 _mm512_setzero_pd (),
592 (__mmask8) -1);
593 }
594
595 extern __inline __m512i
596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
598 {
599 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
600 (__v8di) __Y,
601 (__v8di) __W,
602 (__mmask8) __U);
603 }
604
605 extern __inline __m512i
606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
608 {
609 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
610 (__v8di) __Y,
611 (__v8di)
612 _mm512_setzero_si512 (),
613 (__mmask8) __U);
614 }
615
616 extern __inline __m512i
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
619 {
620 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
621 (__v8di) __Y,
622 (__v8di)
623 _mm512_setzero_si512 (),
624 (__mmask8) -1);
625 }
626
627 extern __inline __m512i
628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
630 {
631 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
632 (__v8di) __Y,
633 (__v8di) __W,
634 (__mmask8) __U);
635 }
636
637 extern __inline __m512i
638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
639 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
640 {
641 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
642 (__v8di) __Y,
643 (__v8di)
644 _mm512_setzero_si512 (),
645 (__mmask8) __U);
646 }
647
648 extern __inline __m512i
649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
651 {
652 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
653 (__v8di) __Y,
654 (__v8di)
655 _mm512_setzero_si512 (),
656 (__mmask8) -1);
657 }
658
659 extern __inline __m512i
660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
661 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
662 {
663 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
664 (__v8di) __Y,
665 (__v8di) __W,
666 (__mmask8) __U);
667 }
668
669 extern __inline __m512i
670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
671 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
672 {
673 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
674 (__v8di) __Y,
675 (__v8di)
676 _mm512_setzero_si512 (),
677 (__mmask8) __U);
678 }
679
680 extern __inline __m512i
681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
682 _mm512_add_epi32 (__m512i __A, __m512i __B)
683 {
684 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
685 (__v16si) __B,
686 (__v16si)
687 _mm512_setzero_si512 (),
688 (__mmask16) -1);
689 }
690
691 extern __inline __m512i
692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
693 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
694 {
695 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
696 (__v16si) __B,
697 (__v16si) __W,
698 (__mmask16) __U);
699 }
700
701 extern __inline __m512i
702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
703 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
704 {
705 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
706 (__v16si) __B,
707 (__v16si)
708 _mm512_setzero_si512 (),
709 (__mmask16) __U);
710 }
711
712 extern __inline __m512i
713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
714 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
715 {
716 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
717 (__v16si) __Y,
718 (__v8di)
719 _mm512_setzero_si512 (),
720 (__mmask8) -1);
721 }
722
723 extern __inline __m512i
724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
725 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
726 {
727 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
728 (__v16si) __Y,
729 (__v8di) __W, __M);
730 }
731
732 extern __inline __m512i
733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
734 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
735 {
736 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
737 (__v16si) __Y,
738 (__v8di)
739 _mm512_setzero_si512 (),
740 __M);
741 }
742
743 extern __inline __m512i
744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
745 _mm512_sub_epi32 (__m512i __A, __m512i __B)
746 {
747 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
748 (__v16si) __B,
749 (__v16si)
750 _mm512_setzero_si512 (),
751 (__mmask16) -1);
752 }
753
754 extern __inline __m512i
755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
756 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
757 {
758 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
759 (__v16si) __B,
760 (__v16si) __W,
761 (__mmask16) __U);
762 }
763
764 extern __inline __m512i
765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
766 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
767 {
768 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
769 (__v16si) __B,
770 (__v16si)
771 _mm512_setzero_si512 (),
772 (__mmask16) __U);
773 }
774
775 extern __inline __m512i
776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
778 {
779 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
780 (__v16si) __Y,
781 (__v8di)
782 _mm512_setzero_si512 (),
783 (__mmask8) -1);
784 }
785
786 extern __inline __m512i
787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
788 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
789 {
790 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
791 (__v16si) __Y,
792 (__v8di) __W, __M);
793 }
794
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
798 {
799 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
800 (__v16si) __Y,
801 (__v8di)
802 _mm512_setzero_si512 (),
803 __M);
804 }
805
806 #ifdef __OPTIMIZE__
807 extern __inline __m512i
808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
810 {
811 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
812 (__v8di)
813 _mm512_setzero_si512 (),
814 (__mmask8) -1);
815 }
816
817 extern __inline __m512i
818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
819 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
820 unsigned int __B)
821 {
822 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
823 (__v8di) __W,
824 (__mmask8) __U);
825 }
826
827 extern __inline __m512i
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
830 {
831 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
832 (__v8di)
833 _mm512_setzero_si512 (),
834 (__mmask8) __U);
835 }
836 #else
837 #define _mm512_slli_epi64(X, C) \
838 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
839 (__v8di)(__m512i)_mm512_setzero_si512 (),\
840 (__mmask8)-1))
841
842 #define _mm512_mask_slli_epi64(W, U, X, C) \
843 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
844 (__v8di)(__m512i)(W),\
845 (__mmask8)(U)))
846
847 #define _mm512_maskz_slli_epi64(U, X, C) \
848 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
849 (__v8di)(__m512i)_mm512_setzero_si512 (),\
850 (__mmask8)(U)))
851 #endif
852
853 extern __inline __m512i
854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
855 _mm512_sll_epi64 (__m512i __A, __m128i __B)
856 {
857 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
858 (__v2di) __B,
859 (__v8di)
860 _mm512_setzero_si512 (),
861 (__mmask8) -1);
862 }
863
864 extern __inline __m512i
865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
867 {
868 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
869 (__v2di) __B,
870 (__v8di) __W,
871 (__mmask8) __U);
872 }
873
874 extern __inline __m512i
875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
876 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
877 {
878 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
879 (__v2di) __B,
880 (__v8di)
881 _mm512_setzero_si512 (),
882 (__mmask8) __U);
883 }
884
885 #ifdef __OPTIMIZE__
886 extern __inline __m512i
887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
888 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
889 {
890 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
891 (__v8di)
892 _mm512_setzero_si512 (),
893 (__mmask8) -1);
894 }
895
896 extern __inline __m512i
897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
898 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
899 __m512i __A, unsigned int __B)
900 {
901 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
902 (__v8di) __W,
903 (__mmask8) __U);
904 }
905
906 extern __inline __m512i
907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
909 {
910 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
911 (__v8di)
912 _mm512_setzero_si512 (),
913 (__mmask8) __U);
914 }
915 #else
916 #define _mm512_srli_epi64(X, C) \
917 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
918 (__v8di)(__m512i)_mm512_setzero_si512 (),\
919 (__mmask8)-1))
920
921 #define _mm512_mask_srli_epi64(W, U, X, C) \
922 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
923 (__v8di)(__m512i)(W),\
924 (__mmask8)(U)))
925
926 #define _mm512_maskz_srli_epi64(U, X, C) \
927 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
928 (__v8di)(__m512i)_mm512_setzero_si512 (),\
929 (__mmask8)(U)))
930 #endif
931
932 extern __inline __m512i
933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934 _mm512_srl_epi64 (__m512i __A, __m128i __B)
935 {
936 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
937 (__v2di) __B,
938 (__v8di)
939 _mm512_setzero_si512 (),
940 (__mmask8) -1);
941 }
942
943 extern __inline __m512i
944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
945 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
946 {
947 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
948 (__v2di) __B,
949 (__v8di) __W,
950 (__mmask8) __U);
951 }
952
953 extern __inline __m512i
954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
955 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
956 {
957 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
958 (__v2di) __B,
959 (__v8di)
960 _mm512_setzero_si512 (),
961 (__mmask8) __U);
962 }
963
964 #ifdef __OPTIMIZE__
965 extern __inline __m512i
966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
968 {
969 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
970 (__v8di)
971 _mm512_setzero_si512 (),
972 (__mmask8) -1);
973 }
974
975 extern __inline __m512i
976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
977 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
978 unsigned int __B)
979 {
980 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
981 (__v8di) __W,
982 (__mmask8) __U);
983 }
984
985 extern __inline __m512i
986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
988 {
989 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
990 (__v8di)
991 _mm512_setzero_si512 (),
992 (__mmask8) __U);
993 }
994 #else
995 #define _mm512_srai_epi64(X, C) \
996 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
997 (__v8di)(__m512i)_mm512_setzero_si512 (),\
998 (__mmask8)-1))
999
1000 #define _mm512_mask_srai_epi64(W, U, X, C) \
1001 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1002 (__v8di)(__m512i)(W),\
1003 (__mmask8)(U)))
1004
1005 #define _mm512_maskz_srai_epi64(U, X, C) \
1006 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1007 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1008 (__mmask8)(U)))
1009 #endif
1010
1011 extern __inline __m512i
1012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1014 {
1015 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1016 (__v2di) __B,
1017 (__v8di)
1018 _mm512_setzero_si512 (),
1019 (__mmask8) -1);
1020 }
1021
1022 extern __inline __m512i
1023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1024 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1025 {
1026 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1027 (__v2di) __B,
1028 (__v8di) __W,
1029 (__mmask8) __U);
1030 }
1031
1032 extern __inline __m512i
1033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1034 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1035 {
1036 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1037 (__v2di) __B,
1038 (__v8di)
1039 _mm512_setzero_si512 (),
1040 (__mmask8) __U);
1041 }
1042
1043 #ifdef __OPTIMIZE__
1044 extern __inline __m512i
1045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1046 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1047 {
1048 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1049 (__v16si)
1050 _mm512_setzero_si512 (),
1051 (__mmask16) -1);
1052 }
1053
1054 extern __inline __m512i
1055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1056 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1057 unsigned int __B)
1058 {
1059 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1060 (__v16si) __W,
1061 (__mmask16) __U);
1062 }
1063
1064 extern __inline __m512i
1065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1066 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1067 {
1068 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1069 (__v16si)
1070 _mm512_setzero_si512 (),
1071 (__mmask16) __U);
1072 }
1073 #else
1074 #define _mm512_slli_epi32(X, C) \
1075 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1076 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1077 (__mmask16)-1))
1078
1079 #define _mm512_mask_slli_epi32(W, U, X, C) \
1080 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1081 (__v16si)(__m512i)(W),\
1082 (__mmask16)(U)))
1083
1084 #define _mm512_maskz_slli_epi32(U, X, C) \
1085 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1086 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1087 (__mmask16)(U)))
1088 #endif
1089
1090 extern __inline __m512i
1091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1093 {
1094 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1095 (__v4si) __B,
1096 (__v16si)
1097 _mm512_setzero_si512 (),
1098 (__mmask16) -1);
1099 }
1100
1101 extern __inline __m512i
1102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1104 {
1105 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1106 (__v4si) __B,
1107 (__v16si) __W,
1108 (__mmask16) __U);
1109 }
1110
1111 extern __inline __m512i
1112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1113 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1114 {
1115 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1116 (__v4si) __B,
1117 (__v16si)
1118 _mm512_setzero_si512 (),
1119 (__mmask16) __U);
1120 }
1121
1122 #ifdef __OPTIMIZE__
1123 extern __inline __m512i
1124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1126 {
1127 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1128 (__v16si)
1129 _mm512_setzero_si512 (),
1130 (__mmask16) -1);
1131 }
1132
1133 extern __inline __m512i
1134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1135 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1136 __m512i __A, unsigned int __B)
1137 {
1138 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1139 (__v16si) __W,
1140 (__mmask16) __U);
1141 }
1142
1143 extern __inline __m512i
1144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1145 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1146 {
1147 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1148 (__v16si)
1149 _mm512_setzero_si512 (),
1150 (__mmask16) __U);
1151 }
1152 #else
1153 #define _mm512_srli_epi32(X, C) \
1154 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1155 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1156 (__mmask16)-1))
1157
1158 #define _mm512_mask_srli_epi32(W, U, X, C) \
1159 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1160 (__v16si)(__m512i)(W),\
1161 (__mmask16)(U)))
1162
1163 #define _mm512_maskz_srli_epi32(U, X, C) \
1164 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1165 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1166 (__mmask16)(U)))
1167 #endif
1168
1169 extern __inline __m512i
1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1172 {
1173 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1174 (__v4si) __B,
1175 (__v16si)
1176 _mm512_setzero_si512 (),
1177 (__mmask16) -1);
1178 }
1179
1180 extern __inline __m512i
1181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1182 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1183 {
1184 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1185 (__v4si) __B,
1186 (__v16si) __W,
1187 (__mmask16) __U);
1188 }
1189
1190 extern __inline __m512i
1191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1192 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1193 {
1194 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1195 (__v4si) __B,
1196 (__v16si)
1197 _mm512_setzero_si512 (),
1198 (__mmask16) __U);
1199 }
1200
1201 #ifdef __OPTIMIZE__
1202 extern __inline __m512i
1203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1204 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1205 {
1206 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1207 (__v16si)
1208 _mm512_setzero_si512 (),
1209 (__mmask16) -1);
1210 }
1211
1212 extern __inline __m512i
1213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1215 unsigned int __B)
1216 {
1217 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1218 (__v16si) __W,
1219 (__mmask16) __U);
1220 }
1221
1222 extern __inline __m512i
1223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1224 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1225 {
1226 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1227 (__v16si)
1228 _mm512_setzero_si512 (),
1229 (__mmask16) __U);
1230 }
1231 #else
1232 #define _mm512_srai_epi32(X, C) \
1233 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1234 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1235 (__mmask16)-1))
1236
1237 #define _mm512_mask_srai_epi32(W, U, X, C) \
1238 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1239 (__v16si)(__m512i)(W),\
1240 (__mmask16)(U)))
1241
1242 #define _mm512_maskz_srai_epi32(U, X, C) \
1243 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1244 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1245 (__mmask16)(U)))
1246 #endif
1247
1248 extern __inline __m512i
1249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1251 {
1252 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1253 (__v4si) __B,
1254 (__v16si)
1255 _mm512_setzero_si512 (),
1256 (__mmask16) -1);
1257 }
1258
1259 extern __inline __m512i
1260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1261 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1262 {
1263 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1264 (__v4si) __B,
1265 (__v16si) __W,
1266 (__mmask16) __U);
1267 }
1268
1269 extern __inline __m512i
1270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1271 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1272 {
1273 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1274 (__v4si) __B,
1275 (__v16si)
1276 _mm512_setzero_si512 (),
1277 (__mmask16) __U);
1278 }
1279
1280 #ifdef __OPTIMIZE__
1281 extern __inline __m128d
1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1284 {
1285 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1286 (__v2df) __B,
1287 __R);
1288 }
1289
1290 extern __inline __m128
1291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1293 {
1294 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1295 (__v4sf) __B,
1296 __R);
1297 }
1298
1299 extern __inline __m128d
1300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1301 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1302 {
1303 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1304 (__v2df) __B,
1305 __R);
1306 }
1307
1308 extern __inline __m128
1309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1311 {
1312 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1313 (__v4sf) __B,
1314 __R);
1315 }
1316
1317 #else
1318 #define _mm_add_round_sd(A, B, C) \
1319 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1320
1321 #define _mm_add_round_ss(A, B, C) \
1322 (__m128)__builtin_ia32_addss_round(A, B, C)
1323
1324 #define _mm_sub_round_sd(A, B, C) \
1325 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1326
1327 #define _mm_sub_round_ss(A, B, C) \
1328 (__m128)__builtin_ia32_subss_round(A, B, C)
1329 #endif
1330
1331 #ifdef __OPTIMIZE__
1332 extern __inline __m512i
1333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1334 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1335 {
1336 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1337 (__v8di) __B,
1338 (__v8di) __C, imm,
1339 (__mmask8) -1);
1340 }
1341
1342 extern __inline __m512i
1343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1344 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1345 __m512i __C, const int imm)
1346 {
1347 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1348 (__v8di) __B,
1349 (__v8di) __C, imm,
1350 (__mmask8) __U);
1351 }
1352
1353 extern __inline __m512i
1354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1356 __m512i __C, const int imm)
1357 {
1358 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1359 (__v8di) __B,
1360 (__v8di) __C,
1361 imm, (__mmask8) __U);
1362 }
1363
1364 extern __inline __m512i
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1367 {
1368 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1369 (__v16si) __B,
1370 (__v16si) __C,
1371 imm, (__mmask16) -1);
1372 }
1373
1374 extern __inline __m512i
1375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1377 __m512i __C, const int imm)
1378 {
1379 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1380 (__v16si) __B,
1381 (__v16si) __C,
1382 imm, (__mmask16) __U);
1383 }
1384
1385 extern __inline __m512i
1386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1387 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1388 __m512i __C, const int imm)
1389 {
1390 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1391 (__v16si) __B,
1392 (__v16si) __C,
1393 imm, (__mmask16) __U);
1394 }
1395 #else
1396 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1397 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1398 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1399 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1400 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1401 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1402 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1403 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1404 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1405 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1406 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1407 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1408 (__mmask16)-1))
1409 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1410 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1411 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1412 (__mmask16)(U)))
1413 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1414 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1415 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1416 (__mmask16)(U)))
1417 #endif
1418
1419 extern __inline __m512d
1420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1421 _mm512_rcp14_pd (__m512d __A)
1422 {
1423 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1424 (__v8df)
1425 _mm512_setzero_pd (),
1426 (__mmask8) -1);
1427 }
1428
1429 extern __inline __m512d
1430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1432 {
1433 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1434 (__v8df) __W,
1435 (__mmask8) __U);
1436 }
1437
1438 extern __inline __m512d
1439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1440 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1441 {
1442 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1443 (__v8df)
1444 _mm512_setzero_pd (),
1445 (__mmask8) __U);
1446 }
1447
1448 extern __inline __m512
1449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1450 _mm512_rcp14_ps (__m512 __A)
1451 {
1452 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1453 (__v16sf)
1454 _mm512_setzero_ps (),
1455 (__mmask16) -1);
1456 }
1457
1458 extern __inline __m512
1459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1461 {
1462 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1463 (__v16sf) __W,
1464 (__mmask16) __U);
1465 }
1466
1467 extern __inline __m512
1468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1469 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1470 {
1471 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1472 (__v16sf)
1473 _mm512_setzero_ps (),
1474 (__mmask16) __U);
1475 }
1476
1477 extern __inline __m128d
1478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1479 _mm_rcp14_sd (__m128d __A, __m128d __B)
1480 {
1481 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
1482 (__v2df) __B);
1483 }
1484
1485 extern __inline __m128
1486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1487 _mm_rcp14_ss (__m128 __A, __m128 __B)
1488 {
1489 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
1490 (__v4sf) __B);
1491 }
1492
1493 extern __inline __m512d
1494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495 _mm512_rsqrt14_pd (__m512d __A)
1496 {
1497 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1498 (__v8df)
1499 _mm512_setzero_pd (),
1500 (__mmask8) -1);
1501 }
1502
1503 extern __inline __m512d
1504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1505 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1506 {
1507 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1508 (__v8df) __W,
1509 (__mmask8) __U);
1510 }
1511
1512 extern __inline __m512d
1513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1514 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1515 {
1516 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1517 (__v8df)
1518 _mm512_setzero_pd (),
1519 (__mmask8) __U);
1520 }
1521
1522 extern __inline __m512
1523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1524 _mm512_rsqrt14_ps (__m512 __A)
1525 {
1526 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1527 (__v16sf)
1528 _mm512_setzero_ps (),
1529 (__mmask16) -1);
1530 }
1531
1532 extern __inline __m512
1533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1534 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1535 {
1536 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1537 (__v16sf) __W,
1538 (__mmask16) __U);
1539 }
1540
1541 extern __inline __m512
1542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1543 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1544 {
1545 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1546 (__v16sf)
1547 _mm512_setzero_ps (),
1548 (__mmask16) __U);
1549 }
1550
1551 extern __inline __m128d
1552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1554 {
1555 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
1556 (__v2df) __B);
1557 }
1558
1559 extern __inline __m128
1560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1561 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1562 {
1563 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
1564 (__v4sf) __B);
1565 }
1566
1567 #ifdef __OPTIMIZE__
1568 extern __inline __m512d
1569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1571 {
1572 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1573 (__v8df)
1574 _mm512_setzero_pd (),
1575 (__mmask8) -1, __R);
1576 }
1577
1578 extern __inline __m512d
1579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1580 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1581 const int __R)
1582 {
1583 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1584 (__v8df) __W,
1585 (__mmask8) __U, __R);
1586 }
1587
1588 extern __inline __m512d
1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1591 {
1592 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1593 (__v8df)
1594 _mm512_setzero_pd (),
1595 (__mmask8) __U, __R);
1596 }
1597
1598 extern __inline __m512
1599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1601 {
1602 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1603 (__v16sf)
1604 _mm512_setzero_ps (),
1605 (__mmask16) -1, __R);
1606 }
1607
1608 extern __inline __m512
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1611 {
1612 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1613 (__v16sf) __W,
1614 (__mmask16) __U, __R);
1615 }
1616
1617 extern __inline __m512
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1620 {
1621 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1622 (__v16sf)
1623 _mm512_setzero_ps (),
1624 (__mmask16) __U, __R);
1625 }
1626
1627 extern __inline __m128d
1628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1629 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1630 {
1631 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1632 (__v2df) __A,
1633 __R);
1634 }
1635
1636 extern __inline __m128
1637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1638 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1639 {
1640 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1641 (__v4sf) __A,
1642 __R);
1643 }
1644 #else
1645 #define _mm512_sqrt_round_pd(A, C) \
1646 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
1647
1648 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1649 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1650
1651 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1652 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1653
1654 #define _mm512_sqrt_round_ps(A, C) \
1655 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
1656
1657 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1658 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1659
1660 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1661 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1662
1663 #define _mm_sqrt_round_sd(A, B, C) \
1664 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1665
1666 #define _mm_sqrt_round_ss(A, B, C) \
1667 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1668 #endif
1669
1670 extern __inline __m512i
1671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1672 _mm512_cvtepi8_epi32 (__m128i __A)
1673 {
1674 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1675 (__v16si)
1676 _mm512_setzero_si512 (),
1677 (__mmask16) -1);
1678 }
1679
1680 extern __inline __m512i
1681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1682 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1683 {
1684 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1685 (__v16si) __W,
1686 (__mmask16) __U);
1687 }
1688
1689 extern __inline __m512i
1690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1691 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1692 {
1693 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1694 (__v16si)
1695 _mm512_setzero_si512 (),
1696 (__mmask16) __U);
1697 }
1698
1699 extern __inline __m512i
1700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1701 _mm512_cvtepi8_epi64 (__m128i __A)
1702 {
1703 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1704 (__v8di)
1705 _mm512_setzero_si512 (),
1706 (__mmask8) -1);
1707 }
1708
1709 extern __inline __m512i
1710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1711 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1712 {
1713 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1714 (__v8di) __W,
1715 (__mmask8) __U);
1716 }
1717
1718 extern __inline __m512i
1719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1721 {
1722 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1723 (__v8di)
1724 _mm512_setzero_si512 (),
1725 (__mmask8) __U);
1726 }
1727
1728 extern __inline __m512i
1729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 _mm512_cvtepi16_epi32 (__m256i __A)
1731 {
1732 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1733 (__v16si)
1734 _mm512_setzero_si512 (),
1735 (__mmask16) -1);
1736 }
1737
1738 extern __inline __m512i
1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1741 {
1742 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1743 (__v16si) __W,
1744 (__mmask16) __U);
1745 }
1746
1747 extern __inline __m512i
1748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1749 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1750 {
1751 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1752 (__v16si)
1753 _mm512_setzero_si512 (),
1754 (__mmask16) __U);
1755 }
1756
1757 extern __inline __m512i
1758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1759 _mm512_cvtepi16_epi64 (__m128i __A)
1760 {
1761 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1762 (__v8di)
1763 _mm512_setzero_si512 (),
1764 (__mmask8) -1);
1765 }
1766
1767 extern __inline __m512i
1768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1769 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1770 {
1771 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1772 (__v8di) __W,
1773 (__mmask8) __U);
1774 }
1775
1776 extern __inline __m512i
1777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1778 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1779 {
1780 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1781 (__v8di)
1782 _mm512_setzero_si512 (),
1783 (__mmask8) __U);
1784 }
1785
1786 extern __inline __m512i
1787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1788 _mm512_cvtepi32_epi64 (__m256i __X)
1789 {
1790 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1791 (__v8di)
1792 _mm512_setzero_si512 (),
1793 (__mmask8) -1);
1794 }
1795
1796 extern __inline __m512i
1797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1799 {
1800 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1801 (__v8di) __W,
1802 (__mmask8) __U);
1803 }
1804
1805 extern __inline __m512i
1806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1807 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1808 {
1809 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1810 (__v8di)
1811 _mm512_setzero_si512 (),
1812 (__mmask8) __U);
1813 }
1814
1815 extern __inline __m512i
1816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1817 _mm512_cvtepu8_epi32 (__m128i __A)
1818 {
1819 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1820 (__v16si)
1821 _mm512_setzero_si512 (),
1822 (__mmask16) -1);
1823 }
1824
1825 extern __inline __m512i
1826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1827 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1828 {
1829 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1830 (__v16si) __W,
1831 (__mmask16) __U);
1832 }
1833
1834 extern __inline __m512i
1835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1836 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1837 {
1838 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1839 (__v16si)
1840 _mm512_setzero_si512 (),
1841 (__mmask16) __U);
1842 }
1843
1844 extern __inline __m512i
1845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846 _mm512_cvtepu8_epi64 (__m128i __A)
1847 {
1848 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1849 (__v8di)
1850 _mm512_setzero_si512 (),
1851 (__mmask8) -1);
1852 }
1853
1854 extern __inline __m512i
1855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1856 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1857 {
1858 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1859 (__v8di) __W,
1860 (__mmask8) __U);
1861 }
1862
1863 extern __inline __m512i
1864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1865 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1866 {
1867 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1868 (__v8di)
1869 _mm512_setzero_si512 (),
1870 (__mmask8) __U);
1871 }
1872
1873 extern __inline __m512i
1874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1875 _mm512_cvtepu16_epi32 (__m256i __A)
1876 {
1877 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1878 (__v16si)
1879 _mm512_setzero_si512 (),
1880 (__mmask16) -1);
1881 }
1882
1883 extern __inline __m512i
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1886 {
1887 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1888 (__v16si) __W,
1889 (__mmask16) __U);
1890 }
1891
1892 extern __inline __m512i
1893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1894 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
1895 {
1896 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1897 (__v16si)
1898 _mm512_setzero_si512 (),
1899 (__mmask16) __U);
1900 }
1901
1902 extern __inline __m512i
1903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1904 _mm512_cvtepu16_epi64 (__m128i __A)
1905 {
1906 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
1907 (__v8di)
1908 _mm512_setzero_si512 (),
1909 (__mmask8) -1);
1910 }
1911
1912 extern __inline __m512i
1913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1914 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1915 {
1916 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
1917 (__v8di) __W,
1918 (__mmask8) __U);
1919 }
1920
1921 extern __inline __m512i
1922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
1924 {
1925 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
1926 (__v8di)
1927 _mm512_setzero_si512 (),
1928 (__mmask8) __U);
1929 }
1930
1931 extern __inline __m512i
1932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1933 _mm512_cvtepu32_epi64 (__m256i __X)
1934 {
1935 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
1936 (__v8di)
1937 _mm512_setzero_si512 (),
1938 (__mmask8) -1);
1939 }
1940
1941 extern __inline __m512i
1942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1943 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1944 {
1945 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
1946 (__v8di) __W,
1947 (__mmask8) __U);
1948 }
1949
1950 extern __inline __m512i
1951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1952 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
1953 {
1954 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
1955 (__v8di)
1956 _mm512_setzero_si512 (),
1957 (__mmask8) __U);
1958 }
1959
1960 #ifdef __OPTIMIZE__
1961 extern __inline __m512d
1962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1963 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
1964 {
1965 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1966 (__v8df) __B,
1967 (__v8df)
1968 _mm512_setzero_pd (),
1969 (__mmask8) -1, __R);
1970 }
1971
1972 extern __inline __m512d
1973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1974 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1975 __m512d __B, const int __R)
1976 {
1977 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1978 (__v8df) __B,
1979 (__v8df) __W,
1980 (__mmask8) __U, __R);
1981 }
1982
1983 extern __inline __m512d
1984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1985 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
1986 const int __R)
1987 {
1988 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1989 (__v8df) __B,
1990 (__v8df)
1991 _mm512_setzero_pd (),
1992 (__mmask8) __U, __R);
1993 }
1994
1995 extern __inline __m512
1996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1997 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
1998 {
1999 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2000 (__v16sf) __B,
2001 (__v16sf)
2002 _mm512_setzero_ps (),
2003 (__mmask16) -1, __R);
2004 }
2005
2006 extern __inline __m512
2007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2009 __m512 __B, const int __R)
2010 {
2011 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2012 (__v16sf) __B,
2013 (__v16sf) __W,
2014 (__mmask16) __U, __R);
2015 }
2016
2017 extern __inline __m512
2018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2019 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2020 {
2021 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2022 (__v16sf) __B,
2023 (__v16sf)
2024 _mm512_setzero_ps (),
2025 (__mmask16) __U, __R);
2026 }
2027
2028 extern __inline __m512d
2029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2030 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2031 {
2032 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2033 (__v8df) __B,
2034 (__v8df)
2035 _mm512_setzero_pd (),
2036 (__mmask8) -1, __R);
2037 }
2038
2039 extern __inline __m512d
2040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2041 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2042 __m512d __B, const int __R)
2043 {
2044 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2045 (__v8df) __B,
2046 (__v8df) __W,
2047 (__mmask8) __U, __R);
2048 }
2049
2050 extern __inline __m512d
2051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2052 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2053 const int __R)
2054 {
2055 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2056 (__v8df) __B,
2057 (__v8df)
2058 _mm512_setzero_pd (),
2059 (__mmask8) __U, __R);
2060 }
2061
2062 extern __inline __m512
2063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2065 {
2066 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2067 (__v16sf) __B,
2068 (__v16sf)
2069 _mm512_setzero_ps (),
2070 (__mmask16) -1, __R);
2071 }
2072
2073 extern __inline __m512
2074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2075 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2076 __m512 __B, const int __R)
2077 {
2078 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2079 (__v16sf) __B,
2080 (__v16sf) __W,
2081 (__mmask16) __U, __R);
2082 }
2083
2084 extern __inline __m512
2085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2087 {
2088 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2089 (__v16sf) __B,
2090 (__v16sf)
2091 _mm512_setzero_ps (),
2092 (__mmask16) __U, __R);
2093 }
2094 #else
2095 #define _mm512_add_round_pd(A, B, C) \
2096 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2097
2098 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2099 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2100
2101 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2102 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2103
2104 #define _mm512_add_round_ps(A, B, C) \
2105 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2106
2107 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2108 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2109
2110 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2111 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2112
2113 #define _mm512_sub_round_pd(A, B, C) \
2114 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2115
2116 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2117 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2118
2119 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2120 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2121
2122 #define _mm512_sub_round_ps(A, B, C) \
2123 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2124
2125 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2126 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2127
2128 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2129 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2130 #endif
2131
2132 #ifdef __OPTIMIZE__
2133 extern __inline __m512d
2134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2136 {
2137 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2138 (__v8df) __B,
2139 (__v8df)
2140 _mm512_setzero_pd (),
2141 (__mmask8) -1, __R);
2142 }
2143
2144 extern __inline __m512d
2145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2147 __m512d __B, const int __R)
2148 {
2149 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2150 (__v8df) __B,
2151 (__v8df) __W,
2152 (__mmask8) __U, __R);
2153 }
2154
2155 extern __inline __m512d
2156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2158 const int __R)
2159 {
2160 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2161 (__v8df) __B,
2162 (__v8df)
2163 _mm512_setzero_pd (),
2164 (__mmask8) __U, __R);
2165 }
2166
2167 extern __inline __m512
2168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2170 {
2171 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2172 (__v16sf) __B,
2173 (__v16sf)
2174 _mm512_setzero_ps (),
2175 (__mmask16) -1, __R);
2176 }
2177
2178 extern __inline __m512
2179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2180 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2181 __m512 __B, const int __R)
2182 {
2183 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2184 (__v16sf) __B,
2185 (__v16sf) __W,
2186 (__mmask16) __U, __R);
2187 }
2188
2189 extern __inline __m512
2190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2191 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2192 {
2193 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2194 (__v16sf) __B,
2195 (__v16sf)
2196 _mm512_setzero_ps (),
2197 (__mmask16) __U, __R);
2198 }
2199
2200 extern __inline __m512d
2201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2203 {
2204 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2205 (__v8df) __V,
2206 (__v8df)
2207 _mm512_setzero_pd (),
2208 (__mmask8) -1, __R);
2209 }
2210
2211 extern __inline __m512d
2212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2213 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2214 __m512d __V, const int __R)
2215 {
2216 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2217 (__v8df) __V,
2218 (__v8df) __W,
2219 (__mmask8) __U, __R);
2220 }
2221
2222 extern __inline __m512d
2223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2224 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2225 const int __R)
2226 {
2227 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2228 (__v8df) __V,
2229 (__v8df)
2230 _mm512_setzero_pd (),
2231 (__mmask8) __U, __R);
2232 }
2233
2234 extern __inline __m512
2235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2236 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2237 {
2238 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2239 (__v16sf) __B,
2240 (__v16sf)
2241 _mm512_setzero_ps (),
2242 (__mmask16) -1, __R);
2243 }
2244
2245 extern __inline __m512
2246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2247 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2248 __m512 __B, const int __R)
2249 {
2250 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2251 (__v16sf) __B,
2252 (__v16sf) __W,
2253 (__mmask16) __U, __R);
2254 }
2255
2256 extern __inline __m512
2257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2258 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2259 {
2260 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2261 (__v16sf) __B,
2262 (__v16sf)
2263 _mm512_setzero_ps (),
2264 (__mmask16) __U, __R);
2265 }
2266
2267 extern __inline __m128d
2268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2270 {
2271 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2272 (__v2df) __B,
2273 __R);
2274 }
2275
2276 extern __inline __m128
2277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2278 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2279 {
2280 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2281 (__v4sf) __B,
2282 __R);
2283 }
2284
2285 extern __inline __m128d
2286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2288 {
2289 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2290 (__v2df) __B,
2291 __R);
2292 }
2293
2294 extern __inline __m128
2295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2297 {
2298 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2299 (__v4sf) __B,
2300 __R);
2301 }
2302
2303 #else
2304 #define _mm512_mul_round_pd(A, B, C) \
2305 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2306
2307 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2308 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2309
2310 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2311 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2312
2313 #define _mm512_mul_round_ps(A, B, C) \
2314 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2315
2316 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2317 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2318
2319 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2320 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2321
2322 #define _mm512_div_round_pd(A, B, C) \
2323 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2324
2325 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2326 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2327
2328 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2329 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2330
2331 #define _mm512_div_round_ps(A, B, C) \
2332 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2333
2334 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2335 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2336
2337 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2338 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2339
2340 #define _mm_mul_round_sd(A, B, C) \
2341 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2342
2343 #define _mm_mul_round_ss(A, B, C) \
2344 (__m128)__builtin_ia32_mulss_round(A, B, C)
2345
2346 #define _mm_div_round_sd(A, B, C) \
2347 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2348
2349 #define _mm_div_round_ss(A, B, C) \
2350 (__m128)__builtin_ia32_divss_round(A, B, C)
2351 #endif
2352
2353 #ifdef __OPTIMIZE__
2354 extern __inline __m512d
2355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2356 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2357 {
2358 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2359 (__v8df) __B,
2360 (__v8df)
2361 _mm512_setzero_pd (),
2362 (__mmask8) -1, __R);
2363 }
2364
2365 extern __inline __m512d
2366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2367 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2368 __m512d __B, const int __R)
2369 {
2370 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2371 (__v8df) __B,
2372 (__v8df) __W,
2373 (__mmask8) __U, __R);
2374 }
2375
2376 extern __inline __m512d
2377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2378 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2379 const int __R)
2380 {
2381 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2382 (__v8df) __B,
2383 (__v8df)
2384 _mm512_setzero_pd (),
2385 (__mmask8) __U, __R);
2386 }
2387
2388 extern __inline __m512
2389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2390 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2391 {
2392 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2393 (__v16sf) __B,
2394 (__v16sf)
2395 _mm512_setzero_ps (),
2396 (__mmask16) -1, __R);
2397 }
2398
2399 extern __inline __m512
2400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2402 __m512 __B, const int __R)
2403 {
2404 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2405 (__v16sf) __B,
2406 (__v16sf) __W,
2407 (__mmask16) __U, __R);
2408 }
2409
2410 extern __inline __m512
2411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2412 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2413 {
2414 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2415 (__v16sf) __B,
2416 (__v16sf)
2417 _mm512_setzero_ps (),
2418 (__mmask16) __U, __R);
2419 }
2420
2421 extern __inline __m512d
2422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2423 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2424 {
2425 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2426 (__v8df) __B,
2427 (__v8df)
2428 _mm512_setzero_pd (),
2429 (__mmask8) -1, __R);
2430 }
2431
2432 extern __inline __m512d
2433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2434 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2435 __m512d __B, const int __R)
2436 {
2437 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2438 (__v8df) __B,
2439 (__v8df) __W,
2440 (__mmask8) __U, __R);
2441 }
2442
2443 extern __inline __m512d
2444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2445 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2446 const int __R)
2447 {
2448 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2449 (__v8df) __B,
2450 (__v8df)
2451 _mm512_setzero_pd (),
2452 (__mmask8) __U, __R);
2453 }
2454
2455 extern __inline __m512
2456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2457 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2458 {
2459 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2460 (__v16sf) __B,
2461 (__v16sf)
2462 _mm512_setzero_ps (),
2463 (__mmask16) -1, __R);
2464 }
2465
2466 extern __inline __m512
2467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2468 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2469 __m512 __B, const int __R)
2470 {
2471 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2472 (__v16sf) __B,
2473 (__v16sf) __W,
2474 (__mmask16) __U, __R);
2475 }
2476
2477 extern __inline __m512
2478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2479 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2480 {
2481 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2482 (__v16sf) __B,
2483 (__v16sf)
2484 _mm512_setzero_ps (),
2485 (__mmask16) __U, __R);
2486 }
2487 #else
2488 #define _mm512_max_round_pd(A, B, R) \
2489 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
2490
2491 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2492 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2493
2494 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2495 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2496
2497 #define _mm512_max_round_ps(A, B, R) \
2498 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_pd(), -1, R)
2499
2500 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2501 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2502
2503 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2504 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2505
2506 #define _mm512_min_round_pd(A, B, R) \
2507 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
2508
2509 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2510 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2511
2512 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2513 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2514
2515 #define _mm512_min_round_ps(A, B, R) \
2516 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, R)
2517
2518 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2519 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2520
2521 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2522 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2523 #endif
2524
2525 #ifdef __OPTIMIZE__
2526 extern __inline __m512d
2527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2528 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2529 {
2530 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2531 (__v8df) __B,
2532 (__v8df)
2533 _mm512_setzero_pd (),
2534 (__mmask8) -1, __R);
2535 }
2536
2537 extern __inline __m512d
2538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2540 __m512d __B, const int __R)
2541 {
2542 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2543 (__v8df) __B,
2544 (__v8df) __W,
2545 (__mmask8) __U, __R);
2546 }
2547
2548 extern __inline __m512d
2549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2550 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2551 const int __R)
2552 {
2553 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2554 (__v8df) __B,
2555 (__v8df)
2556 _mm512_setzero_pd (),
2557 (__mmask8) __U, __R);
2558 }
2559
2560 extern __inline __m512
2561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2563 {
2564 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2565 (__v16sf) __B,
2566 (__v16sf)
2567 _mm512_setzero_ps (),
2568 (__mmask16) -1, __R);
2569 }
2570
2571 extern __inline __m512
2572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2574 __m512 __B, const int __R)
2575 {
2576 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2577 (__v16sf) __B,
2578 (__v16sf) __W,
2579 (__mmask16) __U, __R);
2580 }
2581
2582 extern __inline __m512
2583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2584 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2585 const int __R)
2586 {
2587 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2588 (__v16sf) __B,
2589 (__v16sf)
2590 _mm512_setzero_ps (),
2591 (__mmask16) __U, __R);
2592 }
2593
2594 extern __inline __m128d
2595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2596 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2597 {
2598 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2599 (__v2df) __B,
2600 __R);
2601 }
2602
2603 extern __inline __m128
2604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2606 {
2607 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2608 (__v4sf) __B,
2609 __R);
2610 }
2611 #else
2612 #define _mm512_scalef_round_pd(A, B, C) \
2613 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2614
2615 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2616 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2617
2618 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2619 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2620
2621 #define _mm512_scalef_round_ps(A, B, C) \
2622 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2623
2624 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2625 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2626
2627 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2628 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2629
2630 #define _mm_scalef_round_sd(A, B, C) \
2631 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2632
2633 #define _mm_scalef_round_ss(A, B, C) \
2634 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2635 #endif
2636
2637 #ifdef __OPTIMIZE__
2638 extern __inline __m512d
2639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2640 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2641 {
2642 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2643 (__v8df) __B,
2644 (__v8df) __C,
2645 (__mmask8) -1, __R);
2646 }
2647
2648 extern __inline __m512d
2649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2651 __m512d __C, const int __R)
2652 {
2653 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2654 (__v8df) __B,
2655 (__v8df) __C,
2656 (__mmask8) __U, __R);
2657 }
2658
2659 extern __inline __m512d
2660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2661 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2662 __mmask8 __U, const int __R)
2663 {
2664 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2665 (__v8df) __B,
2666 (__v8df) __C,
2667 (__mmask8) __U, __R);
2668 }
2669
2670 extern __inline __m512d
2671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2672 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2673 __m512d __C, const int __R)
2674 {
2675 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2676 (__v8df) __B,
2677 (__v8df) __C,
2678 (__mmask8) __U, __R);
2679 }
2680
2681 extern __inline __m512
2682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2683 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2684 {
2685 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2686 (__v16sf) __B,
2687 (__v16sf) __C,
2688 (__mmask16) -1, __R);
2689 }
2690
2691 extern __inline __m512
2692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2693 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2694 __m512 __C, const int __R)
2695 {
2696 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2697 (__v16sf) __B,
2698 (__v16sf) __C,
2699 (__mmask16) __U, __R);
2700 }
2701
2702 extern __inline __m512
2703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2704 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2705 __mmask16 __U, const int __R)
2706 {
2707 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2708 (__v16sf) __B,
2709 (__v16sf) __C,
2710 (__mmask16) __U, __R);
2711 }
2712
2713 extern __inline __m512
2714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2715 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2716 __m512 __C, const int __R)
2717 {
2718 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2719 (__v16sf) __B,
2720 (__v16sf) __C,
2721 (__mmask16) __U, __R);
2722 }
2723
2724 extern __inline __m512d
2725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2727 {
2728 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2729 (__v8df) __B,
2730 -(__v8df) __C,
2731 (__mmask8) -1, __R);
2732 }
2733
2734 extern __inline __m512d
2735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2736 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2737 __m512d __C, const int __R)
2738 {
2739 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2740 (__v8df) __B,
2741 -(__v8df) __C,
2742 (__mmask8) __U, __R);
2743 }
2744
2745 extern __inline __m512d
2746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2748 __mmask8 __U, const int __R)
2749 {
2750 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2751 (__v8df) __B,
2752 (__v8df) __C,
2753 (__mmask8) __U, __R);
2754 }
2755
2756 extern __inline __m512d
2757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2759 __m512d __C, const int __R)
2760 {
2761 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2762 (__v8df) __B,
2763 -(__v8df) __C,
2764 (__mmask8) __U, __R);
2765 }
2766
2767 extern __inline __m512
2768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2769 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2770 {
2771 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2772 (__v16sf) __B,
2773 -(__v16sf) __C,
2774 (__mmask16) -1, __R);
2775 }
2776
2777 extern __inline __m512
2778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2779 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2780 __m512 __C, const int __R)
2781 {
2782 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2783 (__v16sf) __B,
2784 -(__v16sf) __C,
2785 (__mmask16) __U, __R);
2786 }
2787
2788 extern __inline __m512
2789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2790 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2791 __mmask16 __U, const int __R)
2792 {
2793 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2794 (__v16sf) __B,
2795 (__v16sf) __C,
2796 (__mmask16) __U, __R);
2797 }
2798
2799 extern __inline __m512
2800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2801 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2802 __m512 __C, const int __R)
2803 {
2804 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2805 (__v16sf) __B,
2806 -(__v16sf) __C,
2807 (__mmask16) __U, __R);
2808 }
2809
2810 extern __inline __m512d
2811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2812 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2813 {
2814 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2815 (__v8df) __B,
2816 (__v8df) __C,
2817 (__mmask8) -1, __R);
2818 }
2819
2820 extern __inline __m512d
2821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2823 __m512d __C, const int __R)
2824 {
2825 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2826 (__v8df) __B,
2827 (__v8df) __C,
2828 (__mmask8) __U, __R);
2829 }
2830
2831 extern __inline __m512d
2832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2833 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2834 __mmask8 __U, const int __R)
2835 {
2836 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2837 (__v8df) __B,
2838 (__v8df) __C,
2839 (__mmask8) __U, __R);
2840 }
2841
2842 extern __inline __m512d
2843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2844 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2845 __m512d __C, const int __R)
2846 {
2847 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2848 (__v8df) __B,
2849 (__v8df) __C,
2850 (__mmask8) __U, __R);
2851 }
2852
2853 extern __inline __m512
2854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2855 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2856 {
2857 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2858 (__v16sf) __B,
2859 (__v16sf) __C,
2860 (__mmask16) -1, __R);
2861 }
2862
2863 extern __inline __m512
2864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2865 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2866 __m512 __C, const int __R)
2867 {
2868 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2869 (__v16sf) __B,
2870 (__v16sf) __C,
2871 (__mmask16) __U, __R);
2872 }
2873
2874 extern __inline __m512
2875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2876 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2877 __mmask16 __U, const int __R)
2878 {
2879 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2880 (__v16sf) __B,
2881 (__v16sf) __C,
2882 (__mmask16) __U, __R);
2883 }
2884
2885 extern __inline __m512
2886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2887 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2888 __m512 __C, const int __R)
2889 {
2890 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2891 (__v16sf) __B,
2892 (__v16sf) __C,
2893 (__mmask16) __U, __R);
2894 }
2895
2896 extern __inline __m512d
2897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2898 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2899 {
2900 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2901 (__v8df) __B,
2902 -(__v8df) __C,
2903 (__mmask8) -1, __R);
2904 }
2905
2906 extern __inline __m512d
2907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2908 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2909 __m512d __C, const int __R)
2910 {
2911 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2912 (__v8df) __B,
2913 -(__v8df) __C,
2914 (__mmask8) __U, __R);
2915 }
2916
2917 extern __inline __m512d
2918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2919 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2920 __mmask8 __U, const int __R)
2921 {
2922 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2923 (__v8df) __B,
2924 (__v8df) __C,
2925 (__mmask8) __U, __R);
2926 }
2927
2928 extern __inline __m512d
2929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2931 __m512d __C, const int __R)
2932 {
2933 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2934 (__v8df) __B,
2935 -(__v8df) __C,
2936 (__mmask8) __U, __R);
2937 }
2938
2939 extern __inline __m512
2940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2941 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2942 {
2943 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2944 (__v16sf) __B,
2945 -(__v16sf) __C,
2946 (__mmask16) -1, __R);
2947 }
2948
2949 extern __inline __m512
2950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2951 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2952 __m512 __C, const int __R)
2953 {
2954 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2955 (__v16sf) __B,
2956 -(__v16sf) __C,
2957 (__mmask16) __U, __R);
2958 }
2959
2960 extern __inline __m512
2961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2962 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2963 __mmask16 __U, const int __R)
2964 {
2965 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2966 (__v16sf) __B,
2967 (__v16sf) __C,
2968 (__mmask16) __U, __R);
2969 }
2970
2971 extern __inline __m512
2972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2973 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2974 __m512 __C, const int __R)
2975 {
2976 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2977 (__v16sf) __B,
2978 -(__v16sf) __C,
2979 (__mmask16) __U, __R);
2980 }
2981
2982 extern __inline __m512d
2983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2984 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2985 {
2986 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2987 (__v8df) __B,
2988 (__v8df) __C,
2989 (__mmask8) -1, __R);
2990 }
2991
2992 extern __inline __m512d
2993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2994 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2995 __m512d __C, const int __R)
2996 {
2997 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
2998 (__v8df) __B,
2999 (__v8df) __C,
3000 (__mmask8) __U, __R);
3001 }
3002
3003 extern __inline __m512d
3004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3005 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3006 __mmask8 __U, const int __R)
3007 {
3008 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3009 (__v8df) __B,
3010 (__v8df) __C,
3011 (__mmask8) __U, __R);
3012 }
3013
3014 extern __inline __m512d
3015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3016 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3017 __m512d __C, const int __R)
3018 {
3019 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3020 (__v8df) __B,
3021 (__v8df) __C,
3022 (__mmask8) __U, __R);
3023 }
3024
3025 extern __inline __m512
3026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3027 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3028 {
3029 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3030 (__v16sf) __B,
3031 (__v16sf) __C,
3032 (__mmask16) -1, __R);
3033 }
3034
3035 extern __inline __m512
3036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3037 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3038 __m512 __C, const int __R)
3039 {
3040 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3041 (__v16sf) __B,
3042 (__v16sf) __C,
3043 (__mmask16) __U, __R);
3044 }
3045
3046 extern __inline __m512
3047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3048 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3049 __mmask16 __U, const int __R)
3050 {
3051 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3052 (__v16sf) __B,
3053 (__v16sf) __C,
3054 (__mmask16) __U, __R);
3055 }
3056
3057 extern __inline __m512
3058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3059 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3060 __m512 __C, const int __R)
3061 {
3062 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3063 (__v16sf) __B,
3064 (__v16sf) __C,
3065 (__mmask16) __U, __R);
3066 }
3067
3068 extern __inline __m512d
3069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3070 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3071 {
3072 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3073 (__v8df) __B,
3074 -(__v8df) __C,
3075 (__mmask8) -1, __R);
3076 }
3077
3078 extern __inline __m512d
3079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3080 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3081 __m512d __C, const int __R)
3082 {
3083 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3084 (__v8df) __B,
3085 (__v8df) __C,
3086 (__mmask8) __U, __R);
3087 }
3088
3089 extern __inline __m512d
3090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3091 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3092 __mmask8 __U, const int __R)
3093 {
3094 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3095 (__v8df) __B,
3096 (__v8df) __C,
3097 (__mmask8) __U, __R);
3098 }
3099
3100 extern __inline __m512d
3101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3102 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3103 __m512d __C, const int __R)
3104 {
3105 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3106 (__v8df) __B,
3107 -(__v8df) __C,
3108 (__mmask8) __U, __R);
3109 }
3110
3111 extern __inline __m512
3112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3113 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3114 {
3115 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3116 (__v16sf) __B,
3117 -(__v16sf) __C,
3118 (__mmask16) -1, __R);
3119 }
3120
3121 extern __inline __m512
3122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3123 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3124 __m512 __C, const int __R)
3125 {
3126 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3127 (__v16sf) __B,
3128 (__v16sf) __C,
3129 (__mmask16) __U, __R);
3130 }
3131
3132 extern __inline __m512
3133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3134 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3135 __mmask16 __U, const int __R)
3136 {
3137 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3138 (__v16sf) __B,
3139 (__v16sf) __C,
3140 (__mmask16) __U, __R);
3141 }
3142
3143 extern __inline __m512
3144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3145 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3146 __m512 __C, const int __R)
3147 {
3148 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3149 (__v16sf) __B,
3150 -(__v16sf) __C,
3151 (__mmask16) __U, __R);
3152 }
3153 #else
3154 #define _mm512_fmadd_round_pd(A, B, C, R) \
3155 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3156
3157 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3158 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3159
3160 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3161 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3162
3163 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3164 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3165
3166 #define _mm512_fmadd_round_ps(A, B, C, R) \
3167 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3168
3169 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3170 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3171
3172 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3173 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3174
3175 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3176 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3177
3178 #define _mm512_fmsub_round_pd(A, B, C, R) \
3179 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3180
3181 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3182 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3183
3184 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3185 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3186
3187 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3188 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3189
3190 #define _mm512_fmsub_round_ps(A, B, C, R) \
3191 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3192
3193 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3194 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3195
3196 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3197 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3198
3199 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3200 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3201
3202 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3203 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3204
3205 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3206 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3207
3208 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3209 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3210
3211 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3212 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3213
3214 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3215 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3216
3217 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3218 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3219
3220 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3221 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3222
3223 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3224 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3225
3226 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3227 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3228
3229 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3230 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3231
3232 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3233 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3234
3235 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3236 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3237
3238 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3239 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3240
3241 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3242 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3243
3244 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3245 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3246
3247 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3248 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3249
3250 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3251 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3252
3253 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3254 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3255
3256 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3257 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3258
3259 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3260 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3261
3262 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3263 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3264
3265 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3266 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3267
3268 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3269 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3270
3271 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3272 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3273
3274 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3275 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3276
3277 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3278 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3279
3280 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3281 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3282
3283 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3284 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3285
3286 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3287 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3288
3289 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3290 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3291
3292 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3293 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3294
3295 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3296 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3297 #endif
3298
3299 extern __inline __m512i
3300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3301 _mm512_abs_epi64 (__m512i __A)
3302 {
3303 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3304 (__v8di)
3305 _mm512_setzero_si512 (),
3306 (__mmask8) -1);
3307 }
3308
3309 extern __inline __m512i
3310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3311 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3312 {
3313 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3314 (__v8di) __W,
3315 (__mmask8) __U);
3316 }
3317
3318 extern __inline __m512i
3319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3320 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3321 {
3322 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3323 (__v8di)
3324 _mm512_setzero_si512 (),
3325 (__mmask8) __U);
3326 }
3327
3328 extern __inline __m512i
3329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3330 _mm512_abs_epi32 (__m512i __A)
3331 {
3332 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3333 (__v16si)
3334 _mm512_setzero_si512 (),
3335 (__mmask16) -1);
3336 }
3337
3338 extern __inline __m512i
3339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3340 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3341 {
3342 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3343 (__v16si) __W,
3344 (__mmask16) __U);
3345 }
3346
3347 extern __inline __m512i
3348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3349 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3350 {
3351 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3352 (__v16si)
3353 _mm512_setzero_si512 (),
3354 (__mmask16) __U);
3355 }
3356
3357 extern __inline __m512
3358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3359 _mm512_broadcastss_ps (__m128 __A)
3360 {
3361 __v16sf __O;
3362 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, __O,
3363 (__mmask16) -1);
3364 }
3365
3366 extern __inline __m512
3367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3368 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3369 {
3370 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3371 (__v16sf) __O, __M);
3372 }
3373
3374 extern __inline __m512
3375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3376 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3377 {
3378 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3379 (__v16sf)
3380 _mm512_setzero_ps (),
3381 __M);
3382 }
3383
3384 extern __inline __m512d
3385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3386 _mm512_broadcastsd_pd (__m128d __A)
3387 {
3388 __v8df __O;
3389 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, __O,
3390 (__mmask8) -1);
3391 }
3392
3393 extern __inline __m512d
3394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3395 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3396 {
3397 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3398 (__v8df) __O, __M);
3399 }
3400
3401 extern __inline __m512d
3402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3403 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3404 {
3405 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3406 (__v8df)
3407 _mm512_setzero_pd (),
3408 __M);
3409 }
3410
3411 extern __inline __m512i
3412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3413 _mm512_broadcastd_epi32 (__m128i __A)
3414 {
3415 __v16si __O;
3416 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, __O,
3417 (__mmask16) -1);
3418 }
3419
3420 extern __inline __m512i
3421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3422 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3423 {
3424 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3425 (__v16si) __O, __M);
3426 }
3427
3428 extern __inline __m512i
3429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3430 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3431 {
3432 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3433 (__v16si)
3434 _mm512_setzero_si512 (),
3435 __M);
3436 }
3437
3438 extern __inline __m512i
3439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3440 _mm512_set1_epi32 (int __A)
3441 {
3442 __v16si __O;
3443 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, __O,
3444 (__mmask16)(-1));
3445 }
3446
3447 extern __inline __m512i
3448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3449 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3450 {
3451 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3452 __M);
3453 }
3454
3455 extern __inline __m512i
3456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3458 {
3459 return (__m512i)
3460 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3461 (__v16si) _mm512_setzero_si512 (),
3462 __M);
3463 }
3464
3465 extern __inline __m512i
3466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3467 _mm512_broadcastq_epi64 (__m128i __A)
3468 {
3469 __v8di __O;
3470 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, __O,
3471 (__mmask8) -1);
3472 }
3473
3474 extern __inline __m512i
3475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3476 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3477 {
3478 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3479 (__v8di) __O, __M);
3480 }
3481
3482 extern __inline __m512i
3483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3484 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3485 {
3486 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3487 (__v8di)
3488 _mm512_setzero_si512 (),
3489 __M);
3490 }
3491
3492 extern __inline __m512i
3493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3494 _mm512_set1_epi64 (long long __A)
3495 {
3496 __v8di __O;
3497 #ifdef TARGET_64BIT
3498 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, __O,
3499 (__mmask8)(-1));
3500 #else
3501 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, __O,
3502 (__mmask8)(-1));
3503 #endif
3504 }
3505
3506 extern __inline __m512i
3507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3508 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3509 {
3510 #ifdef TARGET_64BIT
3511 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3512 __M);
3513 #else
3514 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
3515 __M);
3516 #endif
3517 }
3518
3519 extern __inline __m512i
3520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3521 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3522 {
3523 #ifdef TARGET_64BIT
3524 return (__m512i)
3525 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3526 (__v8di) _mm512_setzero_si512 (),
3527 __M);
3528 #else
3529 return (__m512i)
3530 __builtin_ia32_pbroadcastq512_mem_mask (__A,
3531 (__v8di) _mm512_setzero_si512 (),
3532 __M);
3533 #endif
3534 }
3535
3536 extern __inline __m512
3537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538 _mm512_broadcast_f32x4 (__m128 __A)
3539 {
3540 __v16sf __O;
3541 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, __O,
3542 (__mmask16) -1);
3543 }
3544
3545 extern __inline __m512
3546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3547 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3548 {
3549 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3550 (__v16sf) __O,
3551 __M);
3552 }
3553
3554 extern __inline __m512
3555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3557 {
3558 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3559 (__v16sf)
3560 _mm512_setzero_ps (),
3561 __M);
3562 }
3563
3564 extern __inline __m512i
3565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566 _mm512_broadcast_i32x4 (__m128i __A)
3567 {
3568 __v16si __O;
3569 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3570 __O,
3571 (__mmask16) -1);
3572 }
3573
3574 extern __inline __m512i
3575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3577 {
3578 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3579 (__v16si) __O,
3580 __M);
3581 }
3582
3583 extern __inline __m512i
3584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3585 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3586 {
3587 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3588 (__v16si)
3589 _mm512_setzero_si512 (),
3590 __M);
3591 }
3592
3593 extern __inline __m512d
3594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3595 _mm512_broadcast_f64x4 (__m256d __A)
3596 {
3597 __v8df __O;
3598 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3599 __O,
3600 (__mmask8) -1);
3601 }
3602
3603 extern __inline __m512d
3604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3605 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3606 {
3607 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3608 (__v8df) __O,
3609 __M);
3610 }
3611
3612 extern __inline __m512d
3613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3615 {
3616 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3617 (__v8df)
3618 _mm512_setzero_pd (),
3619 __M);
3620 }
3621
3622 extern __inline __m512i
3623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3624 _mm512_broadcast_i64x4 (__m256i __A)
3625 {
3626 __v8di __O;
3627 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3628 __O,
3629 (__mmask8) -1);
3630 }
3631
3632 extern __inline __m512i
3633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3634 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3635 {
3636 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3637 (__v8di) __O,
3638 __M);
3639 }
3640
3641 extern __inline __m512i
3642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3643 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3644 {
3645 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3646 (__v8di)
3647 _mm512_setzero_si512 (),
3648 __M);
3649 }
3650
3651 typedef enum
3652 {
3653 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3654 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3655 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3656 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3657 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3658 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3659 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3660 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3661 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3662 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3663 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3664 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3665 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3666 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3667 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3668 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3669 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3670 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3671 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3672 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3673 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3674 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3675 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3676 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3677 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3678 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3679 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3680 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3681 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3682 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3683 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3684 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3685 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3686 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3687 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3688 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3689 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3690 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3691 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3692 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3693 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3694 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3695 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3696 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3697 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3698 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3699 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3700 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3701 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3702 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3703 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3704 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3705 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3706 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3707 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3708 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3709 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3710 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3711 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3712 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3713 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3714 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3715 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3716 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3717 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3718 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3719 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3720 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3721 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3722 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3723 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3724 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3725 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3726 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3727 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3728 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3729 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3730 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3731 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3732 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3733 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3734 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3735 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3736 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3737 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3738 _MM_PERM_DDDD = 0xFF
3739 } _MM_PERM_ENUM;
3740
3741 #ifdef __OPTIMIZE__
3742 extern __inline __m512i
3743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3744 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3745 {
3746 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3747 __mask,
3748 (__v16si)
3749 _mm512_setzero_si512 (),
3750 (__mmask16) -1);
3751 }
3752
3753 extern __inline __m512i
3754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3755 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3756 _MM_PERM_ENUM __mask)
3757 {
3758 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3759 __mask,
3760 (__v16si) __W,
3761 (__mmask16) __U);
3762 }
3763
3764 extern __inline __m512i
3765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3766 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3767 {
3768 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3769 __mask,
3770 (__v16si)
3771 _mm512_setzero_si512 (),
3772 (__mmask16) __U);
3773 }
3774
3775 extern __inline __m512i
3776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3777 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3778 {
3779 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3780 (__v8di) __B, __imm,
3781 (__v8di)
3782 _mm512_setzero_si512 (),
3783 (__mmask8) -1);
3784 }
3785
3786 extern __inline __m512i
3787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3788 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3789 __m512i __B, const int __imm)
3790 {
3791 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3792 (__v8di) __B, __imm,
3793 (__v8di) __W,
3794 (__mmask8) __U);
3795 }
3796
3797 extern __inline __m512i
3798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3799 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3800 const int __imm)
3801 {
3802 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3803 (__v8di) __B, __imm,
3804 (__v8di)
3805 _mm512_setzero_si512 (),
3806 (__mmask8) __U);
3807 }
3808
3809 extern __inline __m512i
3810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3811 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3812 {
3813 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3814 (__v16si) __B,
3815 __imm,
3816 (__v16si)
3817 _mm512_setzero_si512 (),
3818 (__mmask16) -1);
3819 }
3820
3821 extern __inline __m512i
3822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3823 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3824 __m512i __B, const int __imm)
3825 {
3826 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3827 (__v16si) __B,
3828 __imm,
3829 (__v16si) __W,
3830 (__mmask16) __U);
3831 }
3832
3833 extern __inline __m512i
3834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3835 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3836 const int __imm)
3837 {
3838 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3839 (__v16si) __B,
3840 __imm,
3841 (__v16si)
3842 _mm512_setzero_si512 (),
3843 (__mmask16) __U);
3844 }
3845
3846 extern __inline __m512d
3847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3849 {
3850 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3851 (__v8df) __B, __imm,
3852 (__v8df)
3853 _mm512_setzero_pd (),
3854 (__mmask8) -1);
3855 }
3856
3857 extern __inline __m512d
3858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3860 __m512d __B, const int __imm)
3861 {
3862 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3863 (__v8df) __B, __imm,
3864 (__v8df) __W,
3865 (__mmask8) __U);
3866 }
3867
3868 extern __inline __m512d
3869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3871 const int __imm)
3872 {
3873 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3874 (__v8df) __B, __imm,
3875 (__v8df)
3876 _mm512_setzero_pd (),
3877 (__mmask8) __U);
3878 }
3879
3880 extern __inline __m512
3881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3882 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3883 {
3884 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3885 (__v16sf) __B, __imm,
3886 (__v16sf)
3887 _mm512_setzero_ps (),
3888 (__mmask16) -1);
3889 }
3890
3891 extern __inline __m512
3892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3893 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3894 __m512 __B, const int __imm)
3895 {
3896 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3897 (__v16sf) __B, __imm,
3898 (__v16sf) __W,
3899 (__mmask16) __U);
3900 }
3901
3902 extern __inline __m512
3903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3904 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
3905 const int __imm)
3906 {
3907 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3908 (__v16sf) __B, __imm,
3909 (__v16sf)
3910 _mm512_setzero_ps (),
3911 (__mmask16) __U);
3912 }
3913
3914 #else
3915 #define _mm512_shuffle_epi32(X, C) \
3916 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
3917 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3918 (__mmask16)-1))
3919
3920 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
3921 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
3922 (__v16si)(__m512i)(W),\
3923 (__mmask16)(U)))
3924
3925 #define _mm512_maskz_shuffle_epi32(U, X, C) \
3926 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
3927 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3928 (__mmask16)(U)))
3929
3930 #define _mm512_shuffle_i64x2(X, Y, C) \
3931 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
3932 (__v8di)(__m512i)(Y), (int)(C),\
3933 (__v8di)(__m512i)_mm512_setzero_si512 (),\
3934 (__mmask8)-1))
3935
3936 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
3937 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
3938 (__v8di)(__m512i)(Y), (int)(C),\
3939 (__v8di)(__m512i)(W),\
3940 (__mmask8)(U)))
3941
3942 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
3943 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
3944 (__v8di)(__m512i)(Y), (int)(C),\
3945 (__v8di)(__m512i)_mm512_setzero_si512 (),\
3946 (__mmask8)(U)))
3947
3948 #define _mm512_shuffle_i32x4(X, Y, C) \
3949 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
3950 (__v16si)(__m512i)(Y), (int)(C),\
3951 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3952 (__mmask16)-1))
3953
3954 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
3955 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
3956 (__v16si)(__m512i)(Y), (int)(C),\
3957 (__v16si)(__m512i)(W),\
3958 (__mmask16)(U)))
3959
3960 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
3961 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
3962 (__v16si)(__m512i)(Y), (int)(C),\
3963 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3964 (__mmask16)(U)))
3965
3966 #define _mm512_shuffle_f64x2(X, Y, C) \
3967 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
3968 (__v8df)(__m512d)(Y), (int)(C),\
3969 (__v8df)(__m512d)_mm512_setzero_pd(),\
3970 (__mmask8)-1))
3971
3972 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
3973 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
3974 (__v8df)(__m512d)(Y), (int)(C),\
3975 (__v8df)(__m512d)(W),\
3976 (__mmask8)(U)))
3977
3978 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
3979 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
3980 (__v8df)(__m512d)(Y), (int)(C),\
3981 (__v8df)(__m512d)_mm512_setzero_pd(),\
3982 (__mmask8)(U)))
3983
3984 #define _mm512_shuffle_f32x4(X, Y, C) \
3985 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
3986 (__v16sf)(__m512)(Y), (int)(C),\
3987 (__v16sf)(__m512)_mm512_setzero_ps(),\
3988 (__mmask16)-1))
3989
3990 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
3991 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
3992 (__v16sf)(__m512)(Y), (int)(C),\
3993 (__v16sf)(__m512)(W),\
3994 (__mmask16)(U)))
3995
3996 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
3997 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
3998 (__v16sf)(__m512)(Y), (int)(C),\
3999 (__v16sf)(__m512)_mm512_setzero_ps(),\
4000 (__mmask16)(U)))
4001 #endif
4002
4003 extern __inline __m512i
4004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4005 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4006 {
4007 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4008 (__v16si) __B,
4009 (__v16si)
4010 _mm512_setzero_si512 (),
4011 (__mmask16) -1);
4012 }
4013
4014 extern __inline __m512i
4015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4016 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4017 {
4018 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4019 (__v16si) __B,
4020 (__v16si) __W,
4021 (__mmask16) __U);
4022 }
4023
4024 extern __inline __m512i
4025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4026 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4027 {
4028 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4029 (__v16si) __B,
4030 (__v16si)
4031 _mm512_setzero_si512 (),
4032 (__mmask16) __U);
4033 }
4034
4035 extern __inline __m512i
4036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4037 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4038 {
4039 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4040 (__v16si) __B,
4041 (__v16si)
4042 _mm512_setzero_si512 (),
4043 (__mmask16) -1);
4044 }
4045
4046 extern __inline __m512i
4047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4048 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4049 {
4050 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4051 (__v16si) __B,
4052 (__v16si) __W,
4053 (__mmask16) __U);
4054 }
4055
4056 extern __inline __m512i
4057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4059 {
4060 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4061 (__v16si) __B,
4062 (__v16si)
4063 _mm512_setzero_si512 (),
4064 (__mmask16) __U);
4065 }
4066
4067 extern __inline __m512i
4068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4069 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4070 {
4071 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4072 (__v8di) __B,
4073 (__v8di)
4074 _mm512_setzero_si512 (),
4075 (__mmask8) -1);
4076 }
4077
4078 extern __inline __m512i
4079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4080 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4081 {
4082 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4083 (__v8di) __B,
4084 (__v8di) __W,
4085 (__mmask8) __U);
4086 }
4087
4088 extern __inline __m512i
4089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4090 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4091 {
4092 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4093 (__v8di) __B,
4094 (__v8di)
4095 _mm512_setzero_si512 (),
4096 (__mmask8) __U);
4097 }
4098
4099 extern __inline __m512i
4100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4101 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4102 {
4103 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4104 (__v8di) __B,
4105 (__v8di)
4106 _mm512_setzero_si512 (),
4107 (__mmask8) -1);
4108 }
4109
4110 extern __inline __m512i
4111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4112 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4113 {
4114 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4115 (__v8di) __B,
4116 (__v8di) __W,
4117 (__mmask8) __U);
4118 }
4119
4120 extern __inline __m512i
4121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4122 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4123 {
4124 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4125 (__v8di) __B,
4126 (__v8di)
4127 _mm512_setzero_si512 (),
4128 (__mmask8) __U);
4129 }
4130
4131 #ifdef __OPTIMIZE__
4132 extern __inline __m256i
4133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4134 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4135 {
4136 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4137 (__v8si)
4138 _mm256_setzero_si256 (),
4139 (__mmask8) -1, __R);
4140 }
4141
4142 extern __inline __m256i
4143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4144 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4145 const int __R)
4146 {
4147 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4148 (__v8si) __W,
4149 (__mmask8) __U, __R);
4150 }
4151
4152 extern __inline __m256i
4153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4154 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4155 {
4156 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4157 (__v8si)
4158 _mm256_setzero_si256 (),
4159 (__mmask8) __U, __R);
4160 }
4161
4162 extern __inline __m256i
4163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4165 {
4166 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4167 (__v8si)
4168 _mm256_setzero_si256 (),
4169 (__mmask8) -1, __R);
4170 }
4171
4172 extern __inline __m256i
4173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4174 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4175 const int __R)
4176 {
4177 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4178 (__v8si) __W,
4179 (__mmask8) __U, __R);
4180 }
4181
4182 extern __inline __m256i
4183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4184 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4185 {
4186 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4187 (__v8si)
4188 _mm256_setzero_si256 (),
4189 (__mmask8) __U, __R);
4190 }
4191 #else
4192 #define _mm512_cvtt_roundpd_epi32(A, B) \
4193 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4194
4195 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4196 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4197
4198 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4199 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4200
4201 #define _mm512_cvtt_roundpd_epu32(A, B) \
4202 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4203
4204 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4205 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4206
4207 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4208 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4209 #endif
4210
4211 #ifdef __OPTIMIZE__
4212 extern __inline __m256i
4213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4215 {
4216 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4217 (__v8si)
4218 _mm256_setzero_si256 (),
4219 (__mmask8) -1, __R);
4220 }
4221
4222 extern __inline __m256i
4223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4225 const int __R)
4226 {
4227 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4228 (__v8si) __W,
4229 (__mmask8) __U, __R);
4230 }
4231
4232 extern __inline __m256i
4233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4235 {
4236 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4237 (__v8si)
4238 _mm256_setzero_si256 (),
4239 (__mmask8) __U, __R);
4240 }
4241
4242 extern __inline __m256i
4243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4245 {
4246 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4247 (__v8si)
4248 _mm256_setzero_si256 (),
4249 (__mmask8) -1, __R);
4250 }
4251
4252 extern __inline __m256i
4253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4255 const int __R)
4256 {
4257 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4258 (__v8si) __W,
4259 (__mmask8) __U, __R);
4260 }
4261
4262 extern __inline __m256i
4263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4264 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4265 {
4266 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4267 (__v8si)
4268 _mm256_setzero_si256 (),
4269 (__mmask8) __U, __R);
4270 }
4271 #else
4272 #define _mm512_cvt_roundpd_epi32(A, B) \
4273 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4274
4275 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4276 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4277
4278 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4279 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4280
4281 #define _mm512_cvt_roundpd_epu32(A, B) \
4282 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4283
4284 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4285 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4286
4287 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4288 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4289 #endif
4290
4291 #ifdef __OPTIMIZE__
4292 extern __inline __m512i
4293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4294 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4295 {
4296 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4297 (__v16si)
4298 _mm512_setzero_si512 (),
4299 (__mmask16) -1, __R);
4300 }
4301
4302 extern __inline __m512i
4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4305 const int __R)
4306 {
4307 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4308 (__v16si) __W,
4309 (__mmask16) __U, __R);
4310 }
4311
4312 extern __inline __m512i
4313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4315 {
4316 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4317 (__v16si)
4318 _mm512_setzero_si512 (),
4319 (__mmask16) __U, __R);
4320 }
4321
4322 extern __inline __m512i
4323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4324 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4325 {
4326 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4327 (__v16si)
4328 _mm512_setzero_si512 (),
4329 (__mmask16) -1, __R);
4330 }
4331
4332 extern __inline __m512i
4333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4334 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4335 const int __R)
4336 {
4337 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4338 (__v16si) __W,
4339 (__mmask16) __U, __R);
4340 }
4341
4342 extern __inline __m512i
4343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4344 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4345 {
4346 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4347 (__v16si)
4348 _mm512_setzero_si512 (),
4349 (__mmask16) __U, __R);
4350 }
4351 #else
4352 #define _mm512_cvtt_roundps_epi32(A, B) \
4353 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4354
4355 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4356 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4357
4358 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4359 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4360
4361 #define _mm512_cvtt_roundps_epu32(A, B) \
4362 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4363
4364 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4365 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4366
4367 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4368 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4369 #endif
4370
4371 #ifdef __OPTIMIZE__
4372 extern __inline __m512i
4373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4374 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4375 {
4376 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4377 (__v16si)
4378 _mm512_setzero_si512 (),
4379 (__mmask16) -1, __R);
4380 }
4381
4382 extern __inline __m512i
4383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4384 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4385 const int __R)
4386 {
4387 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4388 (__v16si) __W,
4389 (__mmask16) __U, __R);
4390 }
4391
4392 extern __inline __m512i
4393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4394 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4395 {
4396 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4397 (__v16si)
4398 _mm512_setzero_si512 (),
4399 (__mmask16) __U, __R);
4400 }
4401
4402 extern __inline __m512i
4403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4404 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4405 {
4406 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4407 (__v16si)
4408 _mm512_setzero_si512 (),
4409 (__mmask16) -1, __R);
4410 }
4411
4412 extern __inline __m512i
4413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4415 const int __R)
4416 {
4417 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4418 (__v16si) __W,
4419 (__mmask16) __U, __R);
4420 }
4421
4422 extern __inline __m512i
4423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4424 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4425 {
4426 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4427 (__v16si)
4428 _mm512_setzero_si512 (),
4429 (__mmask16) __U, __R);
4430 }
4431 #else
4432 #define _mm512_cvt_roundps_epi32(A, B) \
4433 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4434
4435 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4436 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4437
4438 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4439 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4440
4441 #define _mm512_cvt_roundps_epu32(A, B) \
4442 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4443
4444 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4445 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4446
4447 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4448 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4449 #endif
4450
4451 extern __inline __m128d
4452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4453 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4454 {
4455 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4456 }
4457
4458 #ifdef __x86_64__
4459 #ifdef __OPTIMIZE__
4460 extern __inline __m128d
4461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4462 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4463 {
4464 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4465 }
4466
4467 extern __inline __m128d
4468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4469 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4470 {
4471 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4472 }
4473
4474 extern __inline __m128d
4475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4476 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4477 {
4478 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4479 }
4480 #else
4481 #define _mm_cvt_roundu64_sd(A, B, C) \
4482 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4483
4484 #define _mm_cvt_roundi64_sd(A, B, C) \
4485 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4486
4487 #define _mm_cvt_roundsi64_sd(A, B, C) \
4488 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4489 #endif
4490
4491 #endif
4492
4493 #ifdef __OPTIMIZE__
4494 extern __inline __m128
4495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4496 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4497 {
4498 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4499 }
4500
4501 extern __inline __m128
4502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4504 {
4505 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4506 }
4507
4508 extern __inline __m128
4509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4510 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4511 {
4512 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4513 }
4514 #else
4515 #define _mm_cvt_roundu32_ss(A, B, C) \
4516 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4517
4518 #define _mm_cvt_roundi32_ss(A, B, C) \
4519 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4520
4521 #define _mm_cvt_roundsi32_ss(A, B, C) \
4522 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4523 #endif
4524
4525 #ifdef __x86_64__
4526 #ifdef __OPTIMIZE__
4527 extern __inline __m128
4528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4529 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4530 {
4531 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4532 }
4533
4534 extern __inline __m128
4535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4537 {
4538 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4539 }
4540
4541 extern __inline __m128
4542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4543 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4544 {
4545 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4546 }
4547 #else
4548 #define _mm_cvt_roundu64_ss(A, B, C) \
4549 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4550
4551 #define _mm_cvt_roundi64_ss(A, B, C) \
4552 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4553
4554 #define _mm_cvt_roundsi64_ss(A, B, C) \
4555 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4556 #endif
4557
4558 #endif
4559
4560 extern __inline __m128i
4561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4562 _mm512_cvtepi32_epi8 (__m512i __A)
4563 {
4564 __v16qi __O;
4565 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, __O,
4566 (__mmask16) -1);
4567 }
4568
4569 extern __inline void
4570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4571 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4572 {
4573 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4574 }
4575
4576 extern __inline __m128i
4577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4578 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4579 {
4580 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4581 (__v16qi) __O, __M);
4582 }
4583
4584 extern __inline __m128i
4585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4586 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4587 {
4588 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4589 (__v16qi)
4590 _mm_setzero_si128 (),
4591 __M);
4592 }
4593
4594 extern __inline __m128i
4595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4596 _mm512_cvtsepi32_epi8 (__m512i __A)
4597 {
4598 __v16qi __O;
4599 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, __O,
4600 (__mmask16) -1);
4601 }
4602
4603 extern __inline void
4604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4606 {
4607 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4608 }
4609
4610 extern __inline __m128i
4611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4612 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4613 {
4614 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4615 (__v16qi) __O, __M);
4616 }
4617
4618 extern __inline __m128i
4619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4620 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4621 {
4622 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4623 (__v16qi)
4624 _mm_setzero_si128 (),
4625 __M);
4626 }
4627
4628 extern __inline __m128i
4629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4630 _mm512_cvtusepi32_epi8 (__m512i __A)
4631 {
4632 __v16qi __O;
4633 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, __O,
4634 (__mmask16) -1);
4635 }
4636
4637 extern __inline void
4638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4639 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4640 {
4641 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4642 }
4643
4644 extern __inline __m128i
4645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4646 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4647 {
4648 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4649 (__v16qi) __O,
4650 __M);
4651 }
4652
4653 extern __inline __m128i
4654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4655 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4656 {
4657 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4658 (__v16qi)
4659 _mm_setzero_si128 (),
4660 __M);
4661 }
4662
4663 extern __inline __m256i
4664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665 _mm512_cvtepi32_epi16 (__m512i __A)
4666 {
4667 __v16hi __O;
4668 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, __O,
4669 (__mmask16) -1);
4670 }
4671
4672 extern __inline void
4673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4674 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4675 {
4676 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4677 }
4678
4679 extern __inline __m256i
4680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4681 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4682 {
4683 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4684 (__v16hi) __O, __M);
4685 }
4686
4687 extern __inline __m256i
4688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4689 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4690 {
4691 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4692 (__v16hi)
4693 _mm256_setzero_si256 (),
4694 __M);
4695 }
4696
4697 extern __inline __m256i
4698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4699 _mm512_cvtsepi32_epi16 (__m512i __A)
4700 {
4701 __v16hi __O;
4702 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, __O,
4703 (__mmask16) -1);
4704 }
4705
4706 extern __inline void
4707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4708 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4709 {
4710 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4711 }
4712
4713 extern __inline __m256i
4714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4715 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4716 {
4717 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4718 (__v16hi) __O, __M);
4719 }
4720
4721 extern __inline __m256i
4722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4723 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4724 {
4725 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4726 (__v16hi)
4727 _mm256_setzero_si256 (),
4728 __M);
4729 }
4730
4731 extern __inline __m256i
4732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4733 _mm512_cvtusepi32_epi16 (__m512i __A)
4734 {
4735 __v16hi __O;
4736 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, __O,
4737 (__mmask16) -1);
4738 }
4739
4740 extern __inline void
4741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4743 {
4744 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4745 }
4746
4747 extern __inline __m256i
4748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4749 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4750 {
4751 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4752 (__v16hi) __O,
4753 __M);
4754 }
4755
4756 extern __inline __m256i
4757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4758 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4759 {
4760 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4761 (__v16hi)
4762 _mm256_setzero_si256 (),
4763 __M);
4764 }
4765
4766 extern __inline __m256i
4767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4768 _mm512_cvtepi64_epi32 (__m512i __A)
4769 {
4770 __v8si __O;
4771 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, __O,
4772 (__mmask8) -1);
4773 }
4774
4775 extern __inline void
4776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4777 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4778 {
4779 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4780 }
4781
4782 extern __inline __m256i
4783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4784 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4785 {
4786 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4787 (__v8si) __O, __M);
4788 }
4789
4790 extern __inline __m256i
4791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4792 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4793 {
4794 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4795 (__v8si)
4796 _mm256_setzero_si256 (),
4797 __M);
4798 }
4799
4800 extern __inline __m256i
4801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4802 _mm512_cvtsepi64_epi32 (__m512i __A)
4803 {
4804 __v8si __O;
4805 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, __O,
4806 (__mmask8) -1);
4807 }
4808
4809 extern __inline void
4810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4811 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4812 {
4813 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4814 }
4815
4816 extern __inline __m256i
4817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4818 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4819 {
4820 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4821 (__v8si) __O, __M);
4822 }
4823
4824 extern __inline __m256i
4825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4826 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4827 {
4828 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4829 (__v8si)
4830 _mm256_setzero_si256 (),
4831 __M);
4832 }
4833
4834 extern __inline __m256i
4835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4836 _mm512_cvtusepi64_epi32 (__m512i __A)
4837 {
4838 __v8si __O;
4839 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, __O,
4840 (__mmask8) -1);
4841 }
4842
4843 extern __inline
4844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4845 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4846 {
4847 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4848 }
4849
4850 extern __inline __m256i
4851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4853 {
4854 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4855 (__v8si) __O, __M);
4856 }
4857
4858 extern __inline __m256i
4859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4860 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4861 {
4862 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4863 (__v8si)
4864 _mm256_setzero_si256 (),
4865 __M);
4866 }
4867
4868 extern __inline __m128i
4869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4870 _mm512_cvtepi64_epi16 (__m512i __A)
4871 {
4872 __v8hi __O;
4873 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, __O,
4874 (__mmask8) -1);
4875 }
4876
4877 extern __inline void
4878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4879 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4880 {
4881 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4882 }
4883
4884 extern __inline __m128i
4885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4886 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4887 {
4888 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4889 (__v8hi) __O, __M);
4890 }
4891
4892 extern __inline __m128i
4893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
4895 {
4896 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4897 (__v8hi)
4898 _mm_setzero_si128 (),
4899 __M);
4900 }
4901
4902 extern __inline __m128i
4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904 _mm512_cvtsepi64_epi16 (__m512i __A)
4905 {
4906 __v8hi __O;
4907 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, __O,
4908 (__mmask8) -1);
4909 }
4910
4911 extern __inline void
4912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4913 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
4914 {
4915 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4916 }
4917
4918 extern __inline __m128i
4919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4920 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4921 {
4922 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
4923 (__v8hi) __O, __M);
4924 }
4925
4926 extern __inline __m128i
4927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4928 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
4929 {
4930 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
4931 (__v8hi)
4932 _mm_setzero_si128 (),
4933 __M);
4934 }
4935
4936 extern __inline __m128i
4937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4938 _mm512_cvtusepi64_epi16 (__m512i __A)
4939 {
4940 __v8hi __O;
4941 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, __O,
4942 (__mmask8) -1);
4943 }
4944
4945 extern __inline void
4946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4947 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4948 {
4949 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
4950 }
4951
4952 extern __inline __m128i
4953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4954 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4955 {
4956 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
4957 (__v8hi) __O, __M);
4958 }
4959
4960 extern __inline __m128i
4961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4962 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
4963 {
4964 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
4965 (__v8hi)
4966 _mm_setzero_si128 (),
4967 __M);
4968 }
4969
4970 extern __inline __m128i
4971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4972 _mm512_cvtepi64_epi8 (__m512i __A)
4973 {
4974 __v16qi __O;
4975 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, __O,
4976 (__mmask8) -1);
4977 }
4978
4979 extern __inline void
4980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4981 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
4982 {
4983 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
4984 }
4985
4986 extern __inline __m128i
4987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4988 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
4989 {
4990 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
4991 (__v16qi) __O, __M);
4992 }
4993
4994 extern __inline __m128i
4995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
4997 {
4998 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
4999 (__v16qi)
5000 _mm_setzero_si128 (),
5001 __M);
5002 }
5003
5004 extern __inline __m128i
5005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5006 _mm512_cvtsepi64_epi8 (__m512i __A)
5007 {
5008 __v16qi __O;
5009 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, __O,
5010 (__mmask8) -1);
5011 }
5012
5013 extern __inline void
5014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5015 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5016 {
5017 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5018 }
5019
5020 extern __inline __m128i
5021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5022 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5023 {
5024 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5025 (__v16qi) __O, __M);
5026 }
5027
5028 extern __inline __m128i
5029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5030 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5031 {
5032 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5033 (__v16qi)
5034 _mm_setzero_si128 (),
5035 __M);
5036 }
5037
5038 extern __inline __m128i
5039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5040 _mm512_cvtusepi64_epi8 (__m512i __A)
5041 {
5042 __v16qi __O;
5043 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, __O,
5044 (__mmask8) -1);
5045 }
5046
5047 extern __inline void
5048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5049 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5050 {
5051 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5052 }
5053
5054 extern __inline __m128i
5055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5057 {
5058 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5059 (__v16qi) __O,
5060 __M);
5061 }
5062
5063 extern __inline __m128i
5064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5065 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5066 {
5067 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5068 (__v16qi)
5069 _mm_setzero_si128 (),
5070 __M);
5071 }
5072
5073 extern __inline __m512d
5074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5075 _mm512_cvtepi32_pd (__m256i __A)
5076 {
5077 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5078 (__v8df)
5079 _mm512_setzero_pd (),
5080 (__mmask8) -1);
5081 }
5082
5083 extern __inline __m512d
5084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5085 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5086 {
5087 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5088 (__v8df) __W,
5089 (__mmask8) __U);
5090 }
5091
5092 extern __inline __m512d
5093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5094 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5095 {
5096 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5097 (__v8df)
5098 _mm512_setzero_pd (),
5099 (__mmask8) __U);
5100 }
5101
5102 extern __inline __m512d
5103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5104 _mm512_cvtepu32_pd (__m256i __A)
5105 {
5106 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5107 (__v8df)
5108 _mm512_setzero_pd (),
5109 (__mmask8) -1);
5110 }
5111
5112 extern __inline __m512d
5113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5115 {
5116 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5117 (__v8df) __W,
5118 (__mmask8) __U);
5119 }
5120
5121 extern __inline __m512d
5122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5123 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5124 {
5125 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5126 (__v8df)
5127 _mm512_setzero_pd (),
5128 (__mmask8) __U);
5129 }
5130
5131 #ifdef __OPTIMIZE__
5132 extern __inline __m512
5133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5134 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5135 {
5136 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5137 (__v16sf)
5138 _mm512_setzero_ps (),
5139 (__mmask16) -1, __R);
5140 }
5141
5142 extern __inline __m512
5143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5144 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5145 const int __R)
5146 {
5147 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5148 (__v16sf) __W,
5149 (__mmask16) __U, __R);
5150 }
5151
5152 extern __inline __m512
5153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5154 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5155 {
5156 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5157 (__v16sf)
5158 _mm512_setzero_ps (),
5159 (__mmask16) __U, __R);
5160 }
5161
5162 extern __inline __m512
5163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5164 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5165 {
5166 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5167 (__v16sf)
5168 _mm512_setzero_ps (),
5169 (__mmask16) -1, __R);
5170 }
5171
5172 extern __inline __m512
5173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5174 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5175 const int __R)
5176 {
5177 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5178 (__v16sf) __W,
5179 (__mmask16) __U, __R);
5180 }
5181
5182 extern __inline __m512
5183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5185 {
5186 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5187 (__v16sf)
5188 _mm512_setzero_ps (),
5189 (__mmask16) __U, __R);
5190 }
5191
5192 #else
5193 #define _mm512_cvt_roundepi32_ps(A, B) \
5194 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
5195
5196 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5197 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5198
5199 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5200 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5201
5202 #define _mm512_cvt_roundepu32_ps(A, B) \
5203 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
5204
5205 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5206 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5207
5208 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5209 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5210 #endif
5211
5212 #ifdef __OPTIMIZE__
5213 extern __inline __m256d
5214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5215 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5216 {
5217 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5218 __imm,
5219 (__v4df)
5220 _mm256_setzero_pd (),
5221 (__mmask8) -1);
5222 }
5223
5224 extern __inline __m256d
5225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5226 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5227 const int __imm)
5228 {
5229 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5230 __imm,
5231 (__v4df) __W,
5232 (__mmask8) __U);
5233 }
5234
5235 extern __inline __m256d
5236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5237 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5238 {
5239 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5240 __imm,
5241 (__v4df)
5242 _mm256_setzero_pd (),
5243 (__mmask8) __U);
5244 }
5245
5246 extern __inline __m128
5247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5248 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5249 {
5250 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5251 __imm,
5252 (__v4sf)
5253 _mm_setzero_ps (),
5254 (__mmask8) -1);
5255 }
5256
5257 extern __inline __m128
5258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5259 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5260 const int __imm)
5261 {
5262 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5263 __imm,
5264 (__v4sf) __W,
5265 (__mmask8) __U);
5266 }
5267
5268 extern __inline __m128
5269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5270 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5271 {
5272 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5273 __imm,
5274 (__v4sf)
5275 _mm_setzero_ps (),
5276 (__mmask8) __U);
5277 }
5278
5279 extern __inline __m256i
5280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5281 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5282 {
5283 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5284 __imm,
5285 (__v4di)
5286 _mm256_setzero_si256 (),
5287 (__mmask8) -1);
5288 }
5289
5290 extern __inline __m256i
5291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5292 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5293 const int __imm)
5294 {
5295 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5296 __imm,
5297 (__v4di) __W,
5298 (__mmask8) __U);
5299 }
5300
5301 extern __inline __m256i
5302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5303 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5304 {
5305 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5306 __imm,
5307 (__v4di)
5308 _mm256_setzero_si256 (),
5309 (__mmask8) __U);
5310 }
5311
5312 extern __inline __m128i
5313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5314 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5315 {
5316 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5317 __imm,
5318 (__v4si)
5319 _mm_setzero_si128 (),
5320 (__mmask8) -1);
5321 }
5322
5323 extern __inline __m128i
5324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5325 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5326 const int __imm)
5327 {
5328 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5329 __imm,
5330 (__v4si) __W,
5331 (__mmask8) __U);
5332 }
5333
5334 extern __inline __m128i
5335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5336 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5337 {
5338 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5339 __imm,
5340 (__v4si)
5341 _mm_setzero_si128 (),
5342 (__mmask8) __U);
5343 }
5344 #else
5345
5346 #define _mm512_extractf64x4_pd(X, C) \
5347 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5348 (int) (C),\
5349 (__v4df)(__m256d)_mm256_setzero_pd(),\
5350 (__mmask8)-1))
5351
5352 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5353 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5354 (int) (C),\
5355 (__v4df)(__m256d)(W),\
5356 (__mmask8)(U)))
5357
5358 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5359 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5360 (int) (C),\
5361 (__v4df)(__m256d)_mm256_setzero_pd(),\
5362 (__mmask8)(U)))
5363
5364 #define _mm512_extractf32x4_ps(X, C) \
5365 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5366 (int) (C),\
5367 (__v4sf)(__m128)_mm_setzero_ps(),\
5368 (__mmask8)-1))
5369
5370 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5371 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5372 (int) (C),\
5373 (__v4sf)(__m128)(W),\
5374 (__mmask8)(U)))
5375
5376 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5377 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5378 (int) (C),\
5379 (__v4sf)(__m128)_mm_setzero_ps(),\
5380 (__mmask8)(U)))
5381
5382 #define _mm512_extracti64x4_epi64(X, C) \
5383 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5384 (int) (C),\
5385 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5386 (__mmask8)-1))
5387
5388 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5389 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5390 (int) (C),\
5391 (__v4di)(__m256i)(W),\
5392 (__mmask8)(U)))
5393
5394 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5395 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5396 (int) (C),\
5397 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5398 (__mmask8)(U)))
5399
5400 #define _mm512_extracti32x4_epi32(X, C) \
5401 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5402 (int) (C),\
5403 (__v4si)(__m128i)_mm_setzero_si128 (),\
5404 (__mmask8)-1))
5405
5406 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5407 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5408 (int) (C),\
5409 (__v4si)(__m128i)(W),\
5410 (__mmask8)(U)))
5411
5412 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5413 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5414 (int) (C),\
5415 (__v4si)(__m128i)_mm_setzero_si128 (),\
5416 (__mmask8)(U)))
5417 #endif
5418
5419 #ifdef __OPTIMIZE__
5420 extern __inline __m512i
5421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5422 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5423 {
5424 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5425 (__v4si) __B,
5426 __imm,
5427 (__v16si) __A, -1);
5428 }
5429
5430 extern __inline __m512
5431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5432 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5433 {
5434 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5435 (__v4sf) __B,
5436 __imm,
5437 (__v16sf) __A, -1);
5438 }
5439
5440 extern __inline __m512i
5441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5442 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5443 {
5444 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5445 (__v4di) __B,
5446 __imm,
5447 (__v8di)
5448 _mm512_setzero_si512 (),
5449 (__mmask8) -1);
5450 }
5451
5452 extern __inline __m512i
5453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5454 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5455 __m256i __B, const int __imm)
5456 {
5457 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5458 (__v4di) __B,
5459 __imm,
5460 (__v8di) __W,
5461 (__mmask8) __U);
5462 }
5463
5464 extern __inline __m512i
5465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5466 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5467 const int __imm)
5468 {
5469 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5470 (__v4di) __B,
5471 __imm,
5472 (__v8di)
5473 _mm512_setzero_si512 (),
5474 (__mmask8) __U);
5475 }
5476
5477 extern __inline __m512d
5478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5479 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5480 {
5481 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5482 (__v4df) __B,
5483 __imm,
5484 (__v8df)
5485 _mm512_setzero_pd (),
5486 (__mmask8) -1);
5487 }
5488
5489 extern __inline __m512d
5490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5491 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5492 __m256d __B, const int __imm)
5493 {
5494 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5495 (__v4df) __B,
5496 __imm,
5497 (__v8df) __W,
5498 (__mmask8) __U);
5499 }
5500
5501 extern __inline __m512d
5502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5503 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5504 const int __imm)
5505 {
5506 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5507 (__v4df) __B,
5508 __imm,
5509 (__v8df)
5510 _mm512_setzero_pd (),
5511 (__mmask8) __U);
5512 }
5513 #else
5514 #define _mm512_insertf32x4(X, Y, C) \
5515 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5516 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5517
5518 #define _mm512_inserti32x4(X, Y, C) \
5519 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5520 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5521
5522 #define _mm512_insertf64x4(X, Y, C) \
5523 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5524 (__v4df)(__m256d) (Y), (int) (C), \
5525 (__v8df)(__m512d)_mm512_setzero_pd(), \
5526 (__mmask8)-1))
5527
5528 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5529 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5530 (__v4df)(__m256d) (Y), (int) (C), \
5531 (__v8df)(__m512d)(W), \
5532 (__mmask8)(U)))
5533
5534 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5535 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5536 (__v4df)(__m256d) (Y), (int) (C), \
5537 (__v8df)(__m512d)_mm512_setzero_pd(), \
5538 (__mmask8)(U)))
5539
5540 #define _mm512_inserti64x4(X, Y, C) \
5541 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5542 (__v4di)(__m256i) (Y), (int) (C), \
5543 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5544 (__mmask8)-1))
5545
5546 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5547 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5548 (__v4di)(__m256i) (Y), (int) (C),\
5549 (__v8di)(__m512i)(W),\
5550 (__mmask8)(U)))
5551
5552 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5553 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5554 (__v4di)(__m256i) (Y), (int) (C), \
5555 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5556 (__mmask8)(U)))
5557 #endif
5558
5559 extern __inline __m512d
5560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5561 _mm512_loadu_pd (void const *__P)
5562 {
5563 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5564 (__v8df)
5565 _mm512_setzero_pd (),
5566 (__mmask8) -1);
5567 }
5568
5569 extern __inline __m512d
5570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5571 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5572 {
5573 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5574 (__v8df) __W,
5575 (__mmask8) __U);
5576 }
5577
5578 extern __inline __m512d
5579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5580 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5581 {
5582 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5583 (__v8df)
5584 _mm512_setzero_pd (),
5585 (__mmask8) __U);
5586 }
5587
5588 extern __inline void
5589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5590 _mm512_storeu_pd (void *__P, __m512d __A)
5591 {
5592 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5593 (__mmask8) -1);
5594 }
5595
5596 extern __inline void
5597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5598 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5599 {
5600 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5601 (__mmask8) __U);
5602 }
5603
5604 extern __inline __m512
5605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5606 _mm512_loadu_ps (void const *__P)
5607 {
5608 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5609 (__v16sf)
5610 _mm512_setzero_ps (),
5611 (__mmask16) -1);
5612 }
5613
5614 extern __inline __m512
5615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5616 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5617 {
5618 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5619 (__v16sf) __W,
5620 (__mmask16) __U);
5621 }
5622
5623 extern __inline __m512
5624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5625 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5626 {
5627 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5628 (__v16sf)
5629 _mm512_setzero_ps (),
5630 (__mmask16) __U);
5631 }
5632
5633 extern __inline void
5634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5635 _mm512_storeu_ps (void *__P, __m512 __A)
5636 {
5637 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5638 (__mmask16) -1);
5639 }
5640
5641 extern __inline void
5642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5643 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5644 {
5645 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5646 (__mmask16) __U);
5647 }
5648
5649 extern __inline __m512i
5650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5651 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5652 {
5653 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5654 (__v8di) __W,
5655 (__mmask8) __U);
5656 }
5657
5658 extern __inline __m512i
5659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5660 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5661 {
5662 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5663 (__v8di)
5664 _mm512_setzero_si512 (),
5665 (__mmask8) __U);
5666 }
5667
5668 extern __inline void
5669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5670 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5671 {
5672 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5673 (__mmask8) __U);
5674 }
5675
5676 extern __inline void
5677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5678 _mm512_storeu_epi64 (void *__P, __m512i __A)
5679 {
5680 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5681 (__mmask8) -1);
5682 }
5683
5684 extern __inline __m512i
5685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5686 _mm512_loadu_epi32 (void const *__P)
5687 {
5688 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5689 (__v16si)
5690 _mm512_setzero_si512 (),
5691 (__mmask16) -1);
5692 }
5693
5694 extern __inline __m512i
5695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5697 {
5698 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5699 (__v16si) __W,
5700 (__mmask16) __U);
5701 }
5702
5703 extern __inline __m512i
5704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5705 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5706 {
5707 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5708 (__v16si)
5709 _mm512_setzero_si512 (),
5710 (__mmask16) __U);
5711 }
5712
5713 extern __inline void
5714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5715 _mm512_storeu_epi32 (void *__P, __m512i __A)
5716 {
5717 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5718 (__mmask16) -1);
5719 }
5720
5721 extern __inline void
5722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5723 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5724 {
5725 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5726 (__mmask16) __U);
5727 }
5728
5729 extern __inline __m512d
5730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5731 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5732 {
5733 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5734 (__v8di) __C,
5735 (__v8df)
5736 _mm512_setzero_pd (),
5737 (__mmask8) -1);
5738 }
5739
5740 extern __inline __m512d
5741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5742 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5743 {
5744 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5745 (__v8di) __C,
5746 (__v8df) __W,
5747 (__mmask8) __U);
5748 }
5749
5750 extern __inline __m512d
5751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5752 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5753 {
5754 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5755 (__v8di) __C,
5756 (__v8df)
5757 _mm512_setzero_pd (),
5758 (__mmask8) __U);
5759 }
5760
5761 extern __inline __m512
5762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5764 {
5765 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5766 (__v16si) __C,
5767 (__v16sf)
5768 _mm512_setzero_ps (),
5769 (__mmask16) -1);
5770 }
5771
5772 extern __inline __m512
5773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5774 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5775 {
5776 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5777 (__v16si) __C,
5778 (__v16sf) __W,
5779 (__mmask16) __U);
5780 }
5781
5782 extern __inline __m512
5783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5784 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5785 {
5786 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5787 (__v16si) __C,
5788 (__v16sf)
5789 _mm512_setzero_ps (),
5790 (__mmask16) __U);
5791 }
5792
5793 extern __inline __m512i
5794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5795 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5796 {
5797 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5798 /* idx */ ,
5799 (__v8di) __A,
5800 (__v8di) __B,
5801 (__mmask8) -1);
5802 }
5803
5804 extern __inline __m512i
5805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5806 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5807 __m512i __B)
5808 {
5809 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5810 /* idx */ ,
5811 (__v8di) __A,
5812 (__v8di) __B,
5813 (__mmask8) __U);
5814 }
5815
5816 extern __inline __m512i
5817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5819 __mmask8 __U, __m512i __B)
5820 {
5821 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5822 (__v8di) __I
5823 /* idx */ ,
5824 (__v8di) __B,
5825 (__mmask8) __U);
5826 }
5827
5828 extern __inline __m512i
5829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5830 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5831 __m512i __I, __m512i __B)
5832 {
5833 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5834 /* idx */ ,
5835 (__v8di) __A,
5836 (__v8di) __B,
5837 (__mmask8) __U);
5838 }
5839
5840 extern __inline __m512i
5841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5842 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5843 {
5844 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5845 /* idx */ ,
5846 (__v16si) __A,
5847 (__v16si) __B,
5848 (__mmask16) -1);
5849 }
5850
5851 extern __inline __m512i
5852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5853 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5854 __m512i __I, __m512i __B)
5855 {
5856 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5857 /* idx */ ,
5858 (__v16si) __A,
5859 (__v16si) __B,
5860 (__mmask16) __U);
5861 }
5862
5863 extern __inline __m512i
5864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5865 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5866 __mmask16 __U, __m512i __B)
5867 {
5868 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5869 (__v16si) __I
5870 /* idx */ ,
5871 (__v16si) __B,
5872 (__mmask16) __U);
5873 }
5874
5875 extern __inline __m512i
5876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5878 __m512i __I, __m512i __B)
5879 {
5880 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5881 /* idx */ ,
5882 (__v16si) __A,
5883 (__v16si) __B,
5884 (__mmask16) __U);
5885 }
5886
5887 extern __inline __m512d
5888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5889 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5890 {
5891 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5892 /* idx */ ,
5893 (__v8df) __A,
5894 (__v8df) __B,
5895 (__mmask8) -1);
5896 }
5897
5898 extern __inline __m512d
5899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5900 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
5901 __m512d __B)
5902 {
5903 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5904 /* idx */ ,
5905 (__v8df) __A,
5906 (__v8df) __B,
5907 (__mmask8) __U);
5908 }
5909
5910 extern __inline __m512d
5911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5912 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
5913 __m512d __B)
5914 {
5915 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
5916 (__v8di) __I
5917 /* idx */ ,
5918 (__v8df) __B,
5919 (__mmask8) __U);
5920 }
5921
5922 extern __inline __m512d
5923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5924 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
5925 __m512d __B)
5926 {
5927 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
5928 /* idx */ ,
5929 (__v8df) __A,
5930 (__v8df) __B,
5931 (__mmask8) __U);
5932 }
5933
5934 extern __inline __m512
5935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5936 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
5937 {
5938 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
5939 /* idx */ ,
5940 (__v16sf) __A,
5941 (__v16sf) __B,
5942 (__mmask16) -1);
5943 }
5944
5945 extern __inline __m512
5946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5947 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
5948 {
5949 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
5950 /* idx */ ,
5951 (__v16sf) __A,
5952 (__v16sf) __B,
5953 (__mmask16) __U);
5954 }
5955
5956 extern __inline __m512
5957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5958 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
5959 __m512 __B)
5960 {
5961 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
5962 (__v16si) __I
5963 /* idx */ ,
5964 (__v16sf) __B,
5965 (__mmask16) __U);
5966 }
5967
5968 extern __inline __m512
5969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5970 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
5971 __m512 __B)
5972 {
5973 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
5974 /* idx */ ,
5975 (__v16sf) __A,
5976 (__v16sf) __B,
5977 (__mmask16) __U);
5978 }
5979
5980 #ifdef __OPTIMIZE__
5981 extern __inline __m512d
5982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5983 _mm512_permute_pd (__m512d __X, const int __C)
5984 {
5985 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
5986 (__v8df)
5987 _mm512_setzero_pd (),
5988 (__mmask8) -1);
5989 }
5990
5991 extern __inline __m512d
5992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5993 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
5994 {
5995 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
5996 (__v8df) __W,
5997 (__mmask8) __U);
5998 }
5999
6000 extern __inline __m512d
6001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6002 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6003 {
6004 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6005 (__v8df)
6006 _mm512_setzero_pd (),
6007 (__mmask8) __U);
6008 }
6009
6010 extern __inline __m512
6011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6012 _mm512_permute_ps (__m512 __X, const int __C)
6013 {
6014 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6015 (__v16sf)
6016 _mm512_setzero_ps (),
6017 (__mmask16) -1);
6018 }
6019
6020 extern __inline __m512
6021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6022 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6023 {
6024 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6025 (__v16sf) __W,
6026 (__mmask16) __U);
6027 }
6028
6029 extern __inline __m512
6030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6031 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6032 {
6033 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6034 (__v16sf)
6035 _mm512_setzero_ps (),
6036 (__mmask16) __U);
6037 }
6038 #else
6039 #define _mm512_permute_pd(X, C) \
6040 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6041 (__v8df)(__m512d)(X), \
6042 (__mmask8)(-1)))
6043
6044 #define _mm512_mask_permute_pd(W, U, X, C) \
6045 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6046 (__v8df)(__m512d)(W), \
6047 (__mmask8)(U)))
6048
6049 #define _mm512_maskz_permute_pd(U, X, C) \
6050 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6051 (__v8df)(__m512d)_mm512_setzero_pd(), \
6052 (__mmask8)(U)))
6053
6054 #define _mm512_permute_ps(X, C) \
6055 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6056 (__v16sf)(__m512)(X), \
6057 (__mmask16)(-1)))
6058
6059 #define _mm512_mask_permute_ps(W, U, X, C) \
6060 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6061 (__v16sf)(__m512)(W), \
6062 (__mmask16)(U)))
6063
6064 #define _mm512_maskz_permute_ps(U, X, C) \
6065 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6066 (__v16sf)(__m512)_mm512_setzero_ps(), \
6067 (__mmask16)(U)))
6068 #endif
6069
6070 #ifdef __OPTIMIZE__
6071 extern __inline __m512i
6072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073 _mm512_permutex_epi64 (__m512i __X, const int __I)
6074 {
6075 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6076 (__v8di)
6077 _mm512_setzero_si512 (),
6078 (__mmask8) (-1));
6079 }
6080
6081 extern __inline __m512i
6082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6083 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6084 __m512i __X, const int __I)
6085 {
6086 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6087 (__v8di) __W,
6088 (__mmask8) __M);
6089 }
6090
6091 extern __inline __m512i
6092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6093 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6094 {
6095 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6096 (__v8di)
6097 _mm512_setzero_si512 (),
6098 (__mmask8) __M);
6099 }
6100
6101 extern __inline __m512d
6102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6103 _mm512_permutex_pd (__m512d __X, const int __M)
6104 {
6105 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6106 (__v8df)
6107 _mm512_setzero_pd (),
6108 (__mmask8) -1);
6109 }
6110
6111 extern __inline __m512d
6112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6113 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6114 {
6115 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6116 (__v8df) __W,
6117 (__mmask8) __U);
6118 }
6119
6120 extern __inline __m512d
6121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6122 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6123 {
6124 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6125 (__v8df)
6126 _mm512_setzero_pd (),
6127 (__mmask8) __U);
6128 }
6129 #else
6130 #define _mm512_permutex_pd(X, M) \
6131 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6132 (__v8df)(__m512d)(X), (__mmask8)-1))
6133
6134 #define _mm512_mask_permutex_pd(W, U, X, M) \
6135 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6136 (__v8df)(__m512d)(W), (__mmask8)(U)))
6137
6138 #define _mm512_maskz_permutex_pd(U, X, M) \
6139 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6140 (__v8df)(__m512d)_mm512_setzero_pd(),\
6141 (__mmask8)(U)))
6142
6143 #define _mm512_permutex_epi64(X, I) \
6144 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6145 (int)(I), \
6146 (__v8di)(__m512i)(X), \
6147 (__mmask8)(-1)))
6148
6149 #define _mm512_maskz_permutex_epi64(M, X, I) \
6150 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6151 (int)(I), \
6152 (__v8di)(__m512i) \
6153 (_mm512_setzero_si512 ()),\
6154 (__mmask8)(M)))
6155
6156 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6157 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6158 (int)(I), \
6159 (__v8di)(__m512i)(W), \
6160 (__mmask8)(M)))
6161 #endif
6162
6163 extern __inline __m512i
6164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6165 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6166 {
6167 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
6168 (__v8di) __Y,
6169 (__v8di)
6170 _mm512_setzero_si512 (),
6171 __M);
6172 }
6173
6174 extern __inline __m512i
6175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6176 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6177 {
6178 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
6179 (__v8di) __Y,
6180 (__v8di)
6181 _mm512_setzero_si512 (),
6182 (__mmask8) -1);
6183 }
6184
6185 extern __inline __m512i
6186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6187 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6188 __m512i __Y)
6189 {
6190 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
6191 (__v8di) __Y,
6192 (__v8di) __W,
6193 __M);
6194 }
6195
6196 extern __inline __m512i
6197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6198 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6199 {
6200 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
6201 (__v16si) __Y,
6202 (__v16si)
6203 _mm512_setzero_si512 (),
6204 __M);
6205 }
6206
6207 extern __inline __m512i
6208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6209 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6210 {
6211 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
6212 (__v16si) __Y,
6213 (__v16si)
6214 _mm512_setzero_si512 (),
6215 (__mmask16) -1);
6216 }
6217
6218 extern __inline __m512i
6219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6220 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6221 __m512i __Y)
6222 {
6223 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
6224 (__v16si) __Y,
6225 (__v16si) __W,
6226 __M);
6227 }
6228
6229 extern __inline __m512d
6230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6231 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6232 {
6233 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6234 (__v8di) __X,
6235 (__v8df)
6236 _mm512_setzero_pd (),
6237 (__mmask8) -1);
6238 }
6239
6240 extern __inline __m512d
6241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6242 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6243 {
6244 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6245 (__v8di) __X,
6246 (__v8df) __W,
6247 (__mmask8) __U);
6248 }
6249
6250 extern __inline __m512d
6251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6252 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6253 {
6254 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6255 (__v8di) __X,
6256 (__v8df)
6257 _mm512_setzero_pd (),
6258 (__mmask8) __U);
6259 }
6260
6261 extern __inline __m512
6262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6263 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6264 {
6265 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6266 (__v16si) __X,
6267 (__v16sf)
6268 _mm512_setzero_ps (),
6269 (__mmask16) -1);
6270 }
6271
6272 extern __inline __m512
6273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6274 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6275 {
6276 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6277 (__v16si) __X,
6278 (__v16sf) __W,
6279 (__mmask16) __U);
6280 }
6281
6282 extern __inline __m512
6283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6284 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6285 {
6286 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6287 (__v16si) __X,
6288 (__v16sf)
6289 _mm512_setzero_ps (),
6290 (__mmask16) __U);
6291 }
6292
6293 #ifdef __OPTIMIZE__
6294 extern __inline __m512
6295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6296 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6297 {
6298 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6299 (__v16sf) __V, __imm,
6300 (__v16sf)
6301 _mm512_setzero_ps (),
6302 (__mmask16) -1);
6303 }
6304
6305 extern __inline __m512
6306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6307 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6308 __m512 __V, const int __imm)
6309 {
6310 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6311 (__v16sf) __V, __imm,
6312 (__v16sf) __W,
6313 (__mmask16) __U);
6314 }
6315
6316 extern __inline __m512
6317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6318 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6319 {
6320 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6321 (__v16sf) __V, __imm,
6322 (__v16sf)
6323 _mm512_setzero_ps (),
6324 (__mmask16) __U);
6325 }
6326
6327 extern __inline __m512d
6328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6329 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6330 {
6331 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6332 (__v8df) __V, __imm,
6333 (__v8df)
6334 _mm512_setzero_pd (),
6335 (__mmask8) -1);
6336 }
6337
6338 extern __inline __m512d
6339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6340 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6341 __m512d __V, const int __imm)
6342 {
6343 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6344 (__v8df) __V, __imm,
6345 (__v8df) __W,
6346 (__mmask8) __U);
6347 }
6348
6349 extern __inline __m512d
6350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6351 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6352 const int __imm)
6353 {
6354 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6355 (__v8df) __V, __imm,
6356 (__v8df)
6357 _mm512_setzero_pd (),
6358 (__mmask8) __U);
6359 }
6360
6361 extern __inline __m512d
6362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6363 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6364 const int __imm, const int __R)
6365 {
6366 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6367 (__v8df) __B,
6368 (__v8di) __C,
6369 __imm,
6370 (__mmask8) -1, __R);
6371 }
6372
6373 extern __inline __m512d
6374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6375 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6376 __m512i __C, const int __imm, const int __R)
6377 {
6378 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6379 (__v8df) __B,
6380 (__v8di) __C,
6381 __imm,
6382 (__mmask8) __U, __R);
6383 }
6384
6385 extern __inline __m512d
6386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6387 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6388 __m512i __C, const int __imm, const int __R)
6389 {
6390 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6391 (__v8df) __B,
6392 (__v8di) __C,
6393 __imm,
6394 (__mmask8) __U, __R);
6395 }
6396
6397 extern __inline __m512
6398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6399 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6400 const int __imm, const int __R)
6401 {
6402 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6403 (__v16sf) __B,
6404 (__v16si) __C,
6405 __imm,
6406 (__mmask16) -1, __R);
6407 }
6408
6409 extern __inline __m512
6410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6411 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6412 __m512i __C, const int __imm, const int __R)
6413 {
6414 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6415 (__v16sf) __B,
6416 (__v16si) __C,
6417 __imm,
6418 (__mmask16) __U, __R);
6419 }
6420
6421 extern __inline __m512
6422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6424 __m512i __C, const int __imm, const int __R)
6425 {
6426 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6427 (__v16sf) __B,
6428 (__v16si) __C,
6429 __imm,
6430 (__mmask16) __U, __R);
6431 }
6432
6433 extern __inline __m128d
6434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6435 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6436 const int __imm, const int __R)
6437 {
6438 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6439 (__v2df) __B,
6440 (__v2di) __C, __imm,
6441 (__mmask8) -1, __R);
6442 }
6443
6444 extern __inline __m128d
6445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6447 __m128i __C, const int __imm, const int __R)
6448 {
6449 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6450 (__v2df) __B,
6451 (__v2di) __C, __imm,
6452 (__mmask8) __U, __R);
6453 }
6454
6455 extern __inline __m128d
6456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6457 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6458 __m128i __C, const int __imm, const int __R)
6459 {
6460 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6461 (__v2df) __B,
6462 (__v2di) __C,
6463 __imm,
6464 (__mmask8) __U, __R);
6465 }
6466
6467 extern __inline __m128
6468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6469 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6470 const int __imm, const int __R)
6471 {
6472 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6473 (__v4sf) __B,
6474 (__v4si) __C, __imm,
6475 (__mmask8) -1, __R);
6476 }
6477
6478 extern __inline __m128
6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6481 __m128i __C, const int __imm, const int __R)
6482 {
6483 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6484 (__v4sf) __B,
6485 (__v4si) __C, __imm,
6486 (__mmask8) __U, __R);
6487 }
6488
6489 extern __inline __m128
6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6491 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6492 __m128i __C, const int __imm, const int __R)
6493 {
6494 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6495 (__v4sf) __B,
6496 (__v4si) __C, __imm,
6497 (__mmask8) __U, __R);
6498 }
6499
6500 #else
6501 #define _mm512_shuffle_pd(X, Y, C) \
6502 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6503 (__v8df)(__m512d)(Y), (int)(C),\
6504 (__v8df)(__m512d)_mm512_setzero_pd(),\
6505 (__mmask8)-1))
6506
6507 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6508 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6509 (__v8df)(__m512d)(Y), (int)(C),\
6510 (__v8df)(__m512d)(W),\
6511 (__mmask8)(U)))
6512
6513 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6514 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6515 (__v8df)(__m512d)(Y), (int)(C),\
6516 (__v8df)(__m512d)_mm512_setzero_pd(),\
6517 (__mmask8)(U)))
6518
6519 #define _mm512_shuffle_ps(X, Y, C) \
6520 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6521 (__v16sf)(__m512)(Y), (int)(C),\
6522 (__v16sf)(__m512)_mm512_setzero_ps(),\
6523 (__mmask16)-1))
6524
6525 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6526 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6527 (__v16sf)(__m512)(Y), (int)(C),\
6528 (__v16sf)(__m512)(W),\
6529 (__mmask16)(U)))
6530
6531 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6532 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6533 (__v16sf)(__m512)(Y), (int)(C),\
6534 (__v16sf)(__m512)_mm512_setzero_ps(),\
6535 (__mmask16)(U)))
6536
6537 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6538 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6539 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6540 (__mmask8)(-1), (R)))
6541
6542 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6543 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6544 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6545 (__mmask8)(U), (R)))
6546
6547 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6548 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6549 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6550 (__mmask8)(U), (R)))
6551
6552 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6553 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6554 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6555 (__mmask16)(-1), (R)))
6556
6557 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6558 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6559 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6560 (__mmask16)(U), (R)))
6561
6562 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6563 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6564 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6565 (__mmask16)(U), (R)))
6566
6567 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6568 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6569 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6570 (__mmask8)(-1), (R)))
6571
6572 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6573 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6574 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6575 (__mmask8)(U), (R)))
6576
6577 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6578 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6579 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6580 (__mmask8)(U), (R)))
6581
6582 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6583 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6584 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6585 (__mmask8)(-1), (R)))
6586
6587 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6588 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6589 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6590 (__mmask8)(U), (R)))
6591
6592 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6593 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6594 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6595 (__mmask8)(U), (R)))
6596 #endif
6597
6598 extern __inline __m512
6599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6600 _mm512_movehdup_ps (__m512 __A)
6601 {
6602 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6603 (__v16sf)
6604 _mm512_setzero_ps (),
6605 (__mmask16) -1);
6606 }
6607
6608 extern __inline __m512
6609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6610 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6611 {
6612 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6613 (__v16sf) __W,
6614 (__mmask16) __U);
6615 }
6616
6617 extern __inline __m512
6618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6619 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6620 {
6621 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6622 (__v16sf)
6623 _mm512_setzero_ps (),
6624 (__mmask16) __U);
6625 }
6626
6627 extern __inline __m512
6628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6629 _mm512_moveldup_ps (__m512 __A)
6630 {
6631 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6632 (__v16sf)
6633 _mm512_setzero_ps (),
6634 (__mmask16) -1);
6635 }
6636
6637 extern __inline __m512
6638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6639 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6640 {
6641 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6642 (__v16sf) __W,
6643 (__mmask16) __U);
6644 }
6645
6646 extern __inline __m512
6647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6648 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6649 {
6650 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6651 (__v16sf)
6652 _mm512_setzero_ps (),
6653 (__mmask16) __U);
6654 }
6655
6656 extern __inline __m512i
6657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6658 _mm512_or_si512 (__m512i __A, __m512i __B)
6659 {
6660 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6661 (__v16si) __B,
6662 (__v16si)
6663 _mm512_setzero_si512 (),
6664 (__mmask16) -1);
6665 }
6666
6667 extern __inline __m512i
6668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6669 _mm512_or_epi32 (__m512i __A, __m512i __B)
6670 {
6671 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6672 (__v16si) __B,
6673 (__v16si)
6674 _mm512_setzero_si512 (),
6675 (__mmask16) -1);
6676 }
6677
6678 extern __inline __m512i
6679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6681 {
6682 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6683 (__v16si) __B,
6684 (__v16si) __W,
6685 (__mmask16) __U);
6686 }
6687
6688 extern __inline __m512i
6689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6690 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6691 {
6692 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6693 (__v16si) __B,
6694 (__v16si)
6695 _mm512_setzero_si512 (),
6696 (__mmask16) __U);
6697 }
6698
6699 extern __inline __m512i
6700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6701 _mm512_or_epi64 (__m512i __A, __m512i __B)
6702 {
6703 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6704 (__v8di) __B,
6705 (__v8di)
6706 _mm512_setzero_si512 (),
6707 (__mmask8) -1);
6708 }
6709
6710 extern __inline __m512i
6711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6712 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6713 {
6714 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6715 (__v8di) __B,
6716 (__v8di) __W,
6717 (__mmask8) __U);
6718 }
6719
6720 extern __inline __m512i
6721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6722 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6723 {
6724 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6725 (__v8di) __B,
6726 (__v8di)
6727 _mm512_setzero_si512 (),
6728 (__mmask8) __U);
6729 }
6730
6731 extern __inline __m512i
6732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6733 _mm512_xor_si512 (__m512i __A, __m512i __B)
6734 {
6735 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6736 (__v16si) __B,
6737 (__v16si)
6738 _mm512_setzero_si512 (),
6739 (__mmask16) -1);
6740 }
6741
6742 extern __inline __m512i
6743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6745 {
6746 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6747 (__v16si) __B,
6748 (__v16si)
6749 _mm512_setzero_si512 (),
6750 (__mmask16) -1);
6751 }
6752
6753 extern __inline __m512i
6754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6755 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6756 {
6757 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6758 (__v16si) __B,
6759 (__v16si) __W,
6760 (__mmask16) __U);
6761 }
6762
6763 extern __inline __m512i
6764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6765 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6766 {
6767 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6768 (__v16si) __B,
6769 (__v16si)
6770 _mm512_setzero_si512 (),
6771 (__mmask16) __U);
6772 }
6773
6774 extern __inline __m512i
6775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6776 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6777 {
6778 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6779 (__v8di) __B,
6780 (__v8di)
6781 _mm512_setzero_si512 (),
6782 (__mmask8) -1);
6783 }
6784
6785 extern __inline __m512i
6786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6787 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6788 {
6789 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6790 (__v8di) __B,
6791 (__v8di) __W,
6792 (__mmask8) __U);
6793 }
6794
6795 extern __inline __m512i
6796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6797 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6798 {
6799 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6800 (__v8di) __B,
6801 (__v8di)
6802 _mm512_setzero_si512 (),
6803 (__mmask8) __U);
6804 }
6805
6806 #ifdef __OPTIMIZE__
6807 extern __inline __m512i
6808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6809 _mm512_rol_epi32 (__m512i __A, const int __B)
6810 {
6811 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6812 (__v16si)
6813 _mm512_setzero_si512 (),
6814 (__mmask16) -1);
6815 }
6816
6817 extern __inline __m512i
6818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6819 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6820 {
6821 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6822 (__v16si) __W,
6823 (__mmask16) __U);
6824 }
6825
6826 extern __inline __m512i
6827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6828 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6829 {
6830 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6831 (__v16si)
6832 _mm512_setzero_si512 (),
6833 (__mmask16) __U);
6834 }
6835
6836 extern __inline __m512i
6837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6838 _mm512_ror_epi32 (__m512i __A, int __B)
6839 {
6840 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6841 (__v16si)
6842 _mm512_setzero_si512 (),
6843 (__mmask16) -1);
6844 }
6845
6846 extern __inline __m512i
6847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6848 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6849 {
6850 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6851 (__v16si) __W,
6852 (__mmask16) __U);
6853 }
6854
6855 extern __inline __m512i
6856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6857 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6858 {
6859 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6860 (__v16si)
6861 _mm512_setzero_si512 (),
6862 (__mmask16) __U);
6863 }
6864
6865 extern __inline __m512i
6866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867 _mm512_rol_epi64 (__m512i __A, const int __B)
6868 {
6869 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6870 (__v8di)
6871 _mm512_setzero_si512 (),
6872 (__mmask8) -1);
6873 }
6874
6875 extern __inline __m512i
6876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6877 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6878 {
6879 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6880 (__v8di) __W,
6881 (__mmask8) __U);
6882 }
6883
6884 extern __inline __m512i
6885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6886 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6887 {
6888 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6889 (__v8di)
6890 _mm512_setzero_si512 (),
6891 (__mmask8) __U);
6892 }
6893
6894 extern __inline __m512i
6895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6896 _mm512_ror_epi64 (__m512i __A, int __B)
6897 {
6898 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6899 (__v8di)
6900 _mm512_setzero_si512 (),
6901 (__mmask8) -1);
6902 }
6903
6904 extern __inline __m512i
6905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6906 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6907 {
6908 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6909 (__v8di) __W,
6910 (__mmask8) __U);
6911 }
6912
6913 extern __inline __m512i
6914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6915 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6916 {
6917 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6918 (__v8di)
6919 _mm512_setzero_si512 (),
6920 (__mmask8) __U);
6921 }
6922
6923 #else
6924 #define _mm512_rol_epi32(A, B) \
6925 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
6926 (int)(B), \
6927 (__v16si)_mm512_setzero_si512 (), \
6928 (__mmask16)(-1)))
6929 #define _mm512_mask_rol_epi32(W, U, A, B) \
6930 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
6931 (int)(B), \
6932 (__v16si)(__m512i)(W), \
6933 (__mmask16)(U)))
6934 #define _mm512_maskz_rol_epi32(U, A, B) \
6935 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
6936 (int)(B), \
6937 (__v16si)_mm512_setzero_si512 (), \
6938 (__mmask16)(U)))
6939 #define _mm512_ror_epi32(A, B) \
6940 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
6941 (int)(B), \
6942 (__v16si)_mm512_setzero_si512 (), \
6943 (__mmask16)(-1)))
6944 #define _mm512_mask_ror_epi32(W, U, A, B) \
6945 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
6946 (int)(B), \
6947 (__v16si)(__m512i)(W), \
6948 (__mmask16)(U)))
6949 #define _mm512_maskz_ror_epi32(U, A, B) \
6950 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
6951 (int)(B), \
6952 (__v16si)_mm512_setzero_si512 (), \
6953 (__mmask16)(U)))
6954 #define _mm512_rol_epi64(A, B) \
6955 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
6956 (int)(B), \
6957 (__v8di)_mm512_setzero_si512 (), \
6958 (__mmask8)(-1)))
6959 #define _mm512_mask_rol_epi64(W, U, A, B) \
6960 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
6961 (int)(B), \
6962 (__v8di)(__m512i)(W), \
6963 (__mmask8)(U)))
6964 #define _mm512_maskz_rol_epi64(U, A, B) \
6965 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
6966 (int)(B), \
6967 (__v8di)_mm512_setzero_si512 (), \
6968 (__mmask8)(U)))
6969
6970 #define _mm512_ror_epi64(A, B) \
6971 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
6972 (int)(B), \
6973 (__v8di)_mm512_setzero_si512 (), \
6974 (__mmask8)(-1)))
6975 #define _mm512_mask_ror_epi64(W, U, A, B) \
6976 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
6977 (int)(B), \
6978 (__v8di)(__m512i)(W), \
6979 (__mmask8)(U)))
6980 #define _mm512_maskz_ror_epi64(U, A, B) \
6981 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
6982 (int)(B), \
6983 (__v8di)_mm512_setzero_si512 (), \
6984 (__mmask8)(U)))
6985 #endif
6986
6987 extern __inline __m512i
6988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6989 _mm512_and_si512 (__m512i __A, __m512i __B)
6990 {
6991 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
6992 (__v16si) __B,
6993 (__v16si)
6994 _mm512_setzero_si512 (),
6995 (__mmask16) -1);
6996 }
6997
6998 extern __inline __m512i
6999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7000 _mm512_and_epi32 (__m512i __A, __m512i __B)
7001 {
7002 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7003 (__v16si) __B,
7004 (__v16si)
7005 _mm512_setzero_si512 (),
7006 (__mmask16) -1);
7007 }
7008
7009 extern __inline __m512i
7010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7011 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7012 {
7013 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7014 (__v16si) __B,
7015 (__v16si) __W,
7016 (__mmask16) __U);
7017 }
7018
7019 extern __inline __m512i
7020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7022 {
7023 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7024 (__v16si) __B,
7025 (__v16si)
7026 _mm512_setzero_si512 (),
7027 (__mmask16) __U);
7028 }
7029
7030 extern __inline __m512i
7031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032 _mm512_and_epi64 (__m512i __A, __m512i __B)
7033 {
7034 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7035 (__v8di) __B,
7036 (__v8di)
7037 _mm512_setzero_si512 (),
7038 (__mmask8) -1);
7039 }
7040
7041 extern __inline __m512i
7042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7044 {
7045 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7046 (__v8di) __B,
7047 (__v8di) __W, __U);
7048 }
7049
7050 extern __inline __m512i
7051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7052 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7053 {
7054 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7055 (__v8di) __B,
7056 (__v8di)
7057 _mm512_setzero_pd (),
7058 __U);
7059 }
7060
7061 extern __inline __m512i
7062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7063 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7064 {
7065 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7066 (__v16si) __B,
7067 (__v16si)
7068 _mm512_setzero_si512 (),
7069 (__mmask16) -1);
7070 }
7071
7072 extern __inline __m512i
7073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7074 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7075 {
7076 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7077 (__v16si) __B,
7078 (__v16si)
7079 _mm512_setzero_si512 (),
7080 (__mmask16) -1);
7081 }
7082
7083 extern __inline __m512i
7084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7085 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7086 {
7087 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7088 (__v16si) __B,
7089 (__v16si) __W,
7090 (__mmask16) __U);
7091 }
7092
7093 extern __inline __m512i
7094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7095 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7096 {
7097 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7098 (__v16si) __B,
7099 (__v16si)
7100 _mm512_setzero_si512 (),
7101 (__mmask16) __U);
7102 }
7103
7104 extern __inline __m512i
7105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7106 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7107 {
7108 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7109 (__v8di) __B,
7110 (__v8di)
7111 _mm512_setzero_si512 (),
7112 (__mmask8) -1);
7113 }
7114
7115 extern __inline __m512i
7116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7117 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7118 {
7119 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7120 (__v8di) __B,
7121 (__v8di) __W, __U);
7122 }
7123
7124 extern __inline __m512i
7125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7126 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7127 {
7128 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7129 (__v8di) __B,
7130 (__v8di)
7131 _mm512_setzero_pd (),
7132 __U);
7133 }
7134
7135 extern __inline __mmask16
7136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7137 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7138 {
7139 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7140 (__v16si) __B,
7141 (__mmask16) -1);
7142 }
7143
7144 extern __inline __mmask16
7145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7146 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7147 {
7148 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7149 (__v16si) __B, __U);
7150 }
7151
7152 extern __inline __mmask8
7153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7154 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7155 {
7156 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7157 (__v8di) __B,
7158 (__mmask8) -1);
7159 }
7160
7161 extern __inline __mmask8
7162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7163 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7164 {
7165 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7166 }
7167
7168 extern __inline __m512i
7169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7170 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7171 {
7172 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7173 (__v16si) __B,
7174 (__v16si)
7175 _mm512_setzero_si512 (),
7176 (__mmask16) -1);
7177 }
7178
7179 extern __inline __m512i
7180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7181 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7182 __m512i __B)
7183 {
7184 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7185 (__v16si) __B,
7186 (__v16si) __W,
7187 (__mmask16) __U);
7188 }
7189
7190 extern __inline __m512i
7191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7192 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7193 {
7194 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7195 (__v16si) __B,
7196 (__v16si)
7197 _mm512_setzero_si512 (),
7198 (__mmask16) __U);
7199 }
7200
7201 extern __inline __m512i
7202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7203 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7204 {
7205 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7206 (__v8di) __B,
7207 (__v8di)
7208 _mm512_setzero_si512 (),
7209 (__mmask8) -1);
7210 }
7211
7212 extern __inline __m512i
7213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7214 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7215 {
7216 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7217 (__v8di) __B,
7218 (__v8di) __W,
7219 (__mmask8) __U);
7220 }
7221
7222 extern __inline __m512i
7223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7224 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7225 {
7226 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7227 (__v8di) __B,
7228 (__v8di)
7229 _mm512_setzero_si512 (),
7230 (__mmask8) __U);
7231 }
7232
7233 extern __inline __m512i
7234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7236 {
7237 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7238 (__v16si) __B,
7239 (__v16si)
7240 _mm512_setzero_si512 (),
7241 (__mmask16) -1);
7242 }
7243
7244 extern __inline __m512i
7245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7246 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7247 __m512i __B)
7248 {
7249 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7250 (__v16si) __B,
7251 (__v16si) __W,
7252 (__mmask16) __U);
7253 }
7254
7255 extern __inline __m512i
7256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7257 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7258 {
7259 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7260 (__v16si) __B,
7261 (__v16si)
7262 _mm512_setzero_si512 (),
7263 (__mmask16) __U);
7264 }
7265
7266 extern __inline __m512i
7267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7268 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7269 {
7270 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7271 (__v8di) __B,
7272 (__v8di)
7273 _mm512_setzero_si512 (),
7274 (__mmask8) -1);
7275 }
7276
7277 extern __inline __m512i
7278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7279 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7280 {
7281 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7282 (__v8di) __B,
7283 (__v8di) __W,
7284 (__mmask8) __U);
7285 }
7286
7287 extern __inline __m512i
7288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7289 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7290 {
7291 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7292 (__v8di) __B,
7293 (__v8di)
7294 _mm512_setzero_si512 (),
7295 (__mmask8) __U);
7296 }
7297
7298 #ifdef __x86_64__
7299 #ifdef __OPTIMIZE__
7300 extern __inline unsigned long long
7301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7302 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7303 {
7304 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7305 }
7306
7307 extern __inline long long
7308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7309 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7310 {
7311 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7312 }
7313
7314 extern __inline long long
7315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7316 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7317 {
7318 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7319 }
7320
7321 extern __inline unsigned long long
7322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7323 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7324 {
7325 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7326 }
7327
7328 extern __inline long long
7329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7330 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7331 {
7332 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7333 }
7334
7335 extern __inline long long
7336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7337 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7338 {
7339 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7340 }
7341 #else
7342 #define _mm_cvt_roundss_u64(A, B) \
7343 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7344
7345 #define _mm_cvt_roundss_si64(A, B) \
7346 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7347
7348 #define _mm_cvt_roundss_i64(A, B) \
7349 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7350
7351 #define _mm_cvtt_roundss_u64(A, B) \
7352 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7353
7354 #define _mm_cvtt_roundss_i64(A, B) \
7355 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7356
7357 #define _mm_cvtt_roundss_si64(A, B) \
7358 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7359 #endif
7360 #endif
7361
7362 #ifdef __OPTIMIZE__
7363 extern __inline unsigned
7364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7365 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7366 {
7367 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7368 }
7369
7370 extern __inline int
7371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7372 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7373 {
7374 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7375 }
7376
7377 extern __inline int
7378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7379 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7380 {
7381 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7382 }
7383
7384 extern __inline unsigned
7385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7386 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7387 {
7388 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7389 }
7390
7391 extern __inline int
7392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7393 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7394 {
7395 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7396 }
7397
7398 extern __inline int
7399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7400 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7401 {
7402 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7403 }
7404 #else
7405 #define _mm_cvt_roundss_u32(A, B) \
7406 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7407
7408 #define _mm_cvt_roundss_si32(A, B) \
7409 ((int)__builtin_ia32_vcvtss2si32(A, B))
7410
7411 #define _mm_cvt_roundss_i32(A, B) \
7412 ((int)__builtin_ia32_vcvtss2si32(A, B))
7413
7414 #define _mm_cvtt_roundss_u32(A, B) \
7415 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7416
7417 #define _mm_cvtt_roundss_si32(A, B) \
7418 ((int)__builtin_ia32_vcvttss2si32(A, B))
7419
7420 #define _mm_cvtt_roundss_i32(A, B) \
7421 ((int)__builtin_ia32_vcvttss2si32(A, B))
7422 #endif
7423
7424 #ifdef __x86_64__
7425 #ifdef __OPTIMIZE__
7426 extern __inline unsigned long long
7427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7428 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7429 {
7430 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7431 }
7432
7433 extern __inline long long
7434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7435 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7436 {
7437 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7438 }
7439
7440 extern __inline long long
7441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7442 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7443 {
7444 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7445 }
7446
7447 extern __inline unsigned long long
7448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7449 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7450 {
7451 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7452 }
7453
7454 extern __inline long long
7455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7456 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7457 {
7458 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7459 }
7460
7461 extern __inline long long
7462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7463 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7464 {
7465 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7466 }
7467 #else
7468 #define _mm_cvt_roundsd_u64(A, B) \
7469 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7470
7471 #define _mm_cvt_roundsd_si64(A, B) \
7472 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7473
7474 #define _mm_cvt_roundsd_i64(A, B) \
7475 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7476
7477 #define _mm_cvtt_roundsd_u64(A, B) \
7478 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7479
7480 #define _mm_cvtt_roundsd_si64(A, B) \
7481 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7482
7483 #define _mm_cvtt_roundsd_i64(A, B) \
7484 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7485 #endif
7486 #endif
7487
7488 #ifdef __OPTIMIZE__
7489 extern __inline unsigned
7490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7491 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7492 {
7493 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7494 }
7495
7496 extern __inline int
7497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7498 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7499 {
7500 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7501 }
7502
7503 extern __inline int
7504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7505 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7506 {
7507 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7508 }
7509
7510 extern __inline unsigned
7511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7512 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7513 {
7514 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7515 }
7516
7517 extern __inline int
7518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7519 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7520 {
7521 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7522 }
7523
7524 extern __inline int
7525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7526 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7527 {
7528 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7529 }
7530 #else
7531 #define _mm_cvt_roundsd_u32(A, B) \
7532 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7533
7534 #define _mm_cvt_roundsd_si32(A, B) \
7535 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7536
7537 #define _mm_cvt_roundsd_i32(A, B) \
7538 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7539
7540 #define _mm_cvtt_roundsd_u32(A, B) \
7541 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7542
7543 #define _mm_cvtt_roundsd_si32(A, B) \
7544 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7545
7546 #define _mm_cvtt_roundsd_i32(A, B) \
7547 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7548 #endif
7549
7550 extern __inline __m512d
7551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7552 _mm512_movedup_pd (__m512d __A)
7553 {
7554 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7555 (__v8df)
7556 _mm512_setzero_pd (),
7557 (__mmask8) -1);
7558 }
7559
7560 extern __inline __m512d
7561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7562 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7563 {
7564 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7565 (__v8df) __W,
7566 (__mmask8) __U);
7567 }
7568
7569 extern __inline __m512d
7570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7571 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7572 {
7573 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7574 (__v8df)
7575 _mm512_setzero_pd (),
7576 (__mmask8) __U);
7577 }
7578
7579 extern __inline __m512d
7580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7581 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7582 {
7583 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7584 (__v8df) __B,
7585 (__v8df)
7586 _mm512_setzero_pd (),
7587 (__mmask8) -1);
7588 }
7589
7590 extern __inline __m512d
7591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7592 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7593 {
7594 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7595 (__v8df) __B,
7596 (__v8df) __W,
7597 (__mmask8) __U);
7598 }
7599
7600 extern __inline __m512d
7601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7602 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7603 {
7604 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7605 (__v8df) __B,
7606 (__v8df)
7607 _mm512_setzero_pd (),
7608 (__mmask8) __U);
7609 }
7610
7611 extern __inline __m512d
7612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7613 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7614 {
7615 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7616 (__v8df) __B,
7617 (__v8df)
7618 _mm512_setzero_pd (),
7619 (__mmask8) -1);
7620 }
7621
7622 extern __inline __m512d
7623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7624 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7625 {
7626 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7627 (__v8df) __B,
7628 (__v8df) __W,
7629 (__mmask8) __U);
7630 }
7631
7632 extern __inline __m512d
7633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7634 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7635 {
7636 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7637 (__v8df) __B,
7638 (__v8df)
7639 _mm512_setzero_pd (),
7640 (__mmask8) __U);
7641 }
7642
7643 extern __inline __m512
7644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7645 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7646 {
7647 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7648 (__v16sf) __B,
7649 (__v16sf)
7650 _mm512_setzero_ps (),
7651 (__mmask16) -1);
7652 }
7653
7654 extern __inline __m512
7655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7656 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7657 {
7658 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7659 (__v16sf) __B,
7660 (__v16sf) __W,
7661 (__mmask16) __U);
7662 }
7663
7664 extern __inline __m512
7665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7667 {
7668 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7669 (__v16sf) __B,
7670 (__v16sf)
7671 _mm512_setzero_ps (),
7672 (__mmask16) __U);
7673 }
7674
7675 #ifdef __OPTIMIZE__
7676 extern __inline __m512d
7677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7678 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7679 {
7680 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7681 (__v8df)
7682 _mm512_setzero_pd (),
7683 (__mmask8) -1, __R);
7684 }
7685
7686 extern __inline __m512d
7687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7688 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7689 const int __R)
7690 {
7691 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7692 (__v8df) __W,
7693 (__mmask8) __U, __R);
7694 }
7695
7696 extern __inline __m512d
7697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7698 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7699 {
7700 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7701 (__v8df)
7702 _mm512_setzero_pd (),
7703 (__mmask8) __U, __R);
7704 }
7705
7706 extern __inline __m512
7707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7708 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7709 {
7710 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7711 (__v16sf)
7712 _mm512_setzero_ps (),
7713 (__mmask16) -1, __R);
7714 }
7715
7716 extern __inline __m512
7717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7718 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7719 const int __R)
7720 {
7721 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7722 (__v16sf) __W,
7723 (__mmask16) __U, __R);
7724 }
7725
7726 extern __inline __m512
7727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7728 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7729 {
7730 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7731 (__v16sf)
7732 _mm512_setzero_ps (),
7733 (__mmask16) __U, __R);
7734 }
7735
7736 extern __inline __m256i
7737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7738 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7739 {
7740 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7741 __I,
7742 (__v16hi)
7743 _mm256_setzero_si256 (),
7744 -1);
7745 }
7746
7747 extern __inline __m256i
7748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7749 _mm512_cvtps_ph (__m512 __A, const int __I)
7750 {
7751 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7752 __I,
7753 (__v16hi)
7754 _mm256_setzero_si256 (),
7755 -1);
7756 }
7757
7758 extern __inline __m256i
7759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7760 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7761 const int __I)
7762 {
7763 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7764 __I,
7765 (__v16hi) __U,
7766 (__mmask16) __W);
7767 }
7768
7769 extern __inline __m256i
7770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7771 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7772 {
7773 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7774 __I,
7775 (__v16hi) __U,
7776 (__mmask16) __W);
7777 }
7778
7779 extern __inline __m256i
7780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7781 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7782 {
7783 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7784 __I,
7785 (__v16hi)
7786 _mm256_setzero_si256 (),
7787 (__mmask16) __W);
7788 }
7789
7790 extern __inline __m256i
7791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7792 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7793 {
7794 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7795 __I,
7796 (__v16hi)
7797 _mm256_setzero_si256 (),
7798 (__mmask16) __W);
7799 }
7800 #else
7801 #define _mm512_cvt_roundps_pd(A, B) \
7802 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, B)
7803
7804 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7805 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7806
7807 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7808 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7809
7810 #define _mm512_cvt_roundph_ps(A, B) \
7811 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
7812
7813 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7814 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7815
7816 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7817 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7818
7819 #define _mm512_cvt_roundps_ph(A, I) \
7820 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7821 (__v16hi)_mm256_setzero_si256 (), -1))
7822 #define _mm512_cvtps_ph(A, I) \
7823 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7824 (__v16hi)_mm256_setzero_si256 (), -1))
7825 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7826 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7827 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7828 #define _mm512_mask_cvtps_ph(U, W, A, I) \
7829 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7830 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7831 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7832 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7833 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7834 #define _mm512_maskz_cvtps_ph(W, A, I) \
7835 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7836 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7837 #endif
7838
7839 #ifdef __OPTIMIZE__
7840 extern __inline __m256
7841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7842 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7843 {
7844 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7845 (__v8sf)
7846 _mm256_setzero_ps (),
7847 (__mmask8) -1, __R);
7848 }
7849
7850 extern __inline __m256
7851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7852 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7853 const int __R)
7854 {
7855 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7856 (__v8sf) __W,
7857 (__mmask8) __U, __R);
7858 }
7859
7860 extern __inline __m256
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7863 {
7864 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7865 (__v8sf)
7866 _mm256_setzero_ps (),
7867 (__mmask8) __U, __R);
7868 }
7869
7870 extern __inline __m128
7871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7872 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7873 {
7874 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7875 (__v2df) __B,
7876 __R);
7877 }
7878
7879 extern __inline __m128d
7880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7881 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7882 {
7883 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7884 (__v4sf) __B,
7885 __R);
7886 }
7887 #else
7888 #define _mm512_cvt_roundpd_ps(A, B) \
7889 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), -1, B)
7890
7891 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7892 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7893
7894 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7895 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
7896
7897 #define _mm_cvt_roundsd_ss(A, B, C) \
7898 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
7899
7900 #define _mm_cvt_roundss_sd(A, B, C) \
7901 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
7902 #endif
7903
7904 extern __inline void
7905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7906 _mm512_stream_si512 (__m512i * __P, __m512i __A)
7907 {
7908 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
7909 }
7910
7911 extern __inline void
7912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7913 _mm512_stream_ps (float *__P, __m512 __A)
7914 {
7915 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
7916 }
7917
7918 extern __inline void
7919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7920 _mm512_stream_pd (double *__P, __m512d __A)
7921 {
7922 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
7923 }
7924
7925 extern __inline __m512i
7926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7927 _mm512_stream_load_si512 (void *__P)
7928 {
7929 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
7930 }
7931
7932 #ifdef __OPTIMIZE__
7933 extern __inline __m128
7934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7935 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
7936 {
7937 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
7938 (__v4sf) __B,
7939 __R);
7940 }
7941
7942 extern __inline __m128d
7943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7944 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
7945 {
7946 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
7947 (__v2df) __B,
7948 __R);
7949 }
7950
7951 extern __inline __m512
7952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953 _mm512_getexp_round_ps (__m512 __A, const int __R)
7954 {
7955 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7956 (__v16sf)
7957 _mm512_setzero_ps (),
7958 (__mmask16) -1, __R);
7959 }
7960
7961 extern __inline __m512
7962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7963 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
7964 const int __R)
7965 {
7966 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7967 (__v16sf) __W,
7968 (__mmask16) __U, __R);
7969 }
7970
7971 extern __inline __m512
7972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7973 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
7974 {
7975 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7976 (__v16sf)
7977 _mm512_setzero_ps (),
7978 (__mmask16) __U, __R);
7979 }
7980
7981 extern __inline __m512d
7982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7983 _mm512_getexp_round_pd (__m512d __A, const int __R)
7984 {
7985 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7986 (__v8df)
7987 _mm512_setzero_pd (),
7988 (__mmask8) -1, __R);
7989 }
7990
7991 extern __inline __m512d
7992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7993 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
7994 const int __R)
7995 {
7996 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7997 (__v8df) __W,
7998 (__mmask8) __U, __R);
7999 }
8000
8001 extern __inline __m512d
8002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8003 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8004 {
8005 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8006 (__v8df)
8007 _mm512_setzero_pd (),
8008 (__mmask8) __U, __R);
8009 }
8010
8011 /* Constants for mantissa extraction */
8012 typedef enum
8013 {
8014 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8015 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8016 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8017 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8018 } _MM_MANTISSA_NORM_ENUM;
8019
8020 typedef enum
8021 {
8022 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8023 _MM_MANT_SIGN_zero, /* sign = 0 */
8024 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8025 } _MM_MANTISSA_SIGN_ENUM;
8026
8027 extern __inline __m512d
8028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8029 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8030 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8031 {
8032 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8033 (__C << 2) | __B,
8034 _mm512_setzero_pd (),
8035 (__mmask8) -1, __R);
8036 }
8037
8038 extern __inline __m512d
8039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8040 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8041 _MM_MANTISSA_NORM_ENUM __B,
8042 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8043 {
8044 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8045 (__C << 2) | __B,
8046 (__v8df) __W, __U,
8047 __R);
8048 }
8049
8050 extern __inline __m512d
8051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8052 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8053 _MM_MANTISSA_NORM_ENUM __B,
8054 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8055 {
8056 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8057 (__C << 2) | __B,
8058 (__v8df)
8059 _mm512_setzero_pd (),
8060 __U, __R);
8061 }
8062
8063 extern __inline __m512
8064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8065 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8066 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8067 {
8068 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8069 (__C << 2) | __B,
8070 _mm512_setzero_ps (),
8071 (__mmask16) -1, __R);
8072 }
8073
8074 extern __inline __m512
8075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8076 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8077 _MM_MANTISSA_NORM_ENUM __B,
8078 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8079 {
8080 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8081 (__C << 2) | __B,
8082 (__v16sf) __W, __U,
8083 __R);
8084 }
8085
8086 extern __inline __m512
8087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8088 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8089 _MM_MANTISSA_NORM_ENUM __B,
8090 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8091 {
8092 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8093 (__C << 2) | __B,
8094 (__v16sf)
8095 _mm512_setzero_ps (),
8096 __U, __R);
8097 }
8098
8099 extern __inline __m128d
8100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8101 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8102 _MM_MANTISSA_NORM_ENUM __C,
8103 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8104 {
8105 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8106 (__v2df) __B,
8107 (__D << 2) | __C,
8108 __R);
8109 }
8110
8111 extern __inline __m128
8112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8114 _MM_MANTISSA_NORM_ENUM __C,
8115 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8116 {
8117 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8118 (__v4sf) __B,
8119 (__D << 2) | __C,
8120 __R);
8121 }
8122
8123 #else
8124 #define _mm512_getmant_round_pd(X, B, C, R) \
8125 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8126 (int)(((C)<<2) | (B)), \
8127 (__v8df)(__m512d)_mm512_setzero_pd(), \
8128 (__mmask8)-1,\
8129 (R)))
8130
8131 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8132 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8133 (int)(((C)<<2) | (B)), \
8134 (__v8df)(__m512d)(W), \
8135 (__mmask8)(U),\
8136 (R)))
8137
8138 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8139 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8140 (int)(((C)<<2) | (B)), \
8141 (__v8df)(__m512d)_mm512_setzero_pd(), \
8142 (__mmask8)(U),\
8143 (R)))
8144 #define _mm512_getmant_round_ps(X, B, C, R) \
8145 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8146 (int)(((C)<<2) | (B)), \
8147 (__v16sf)(__m512)_mm512_setzero_ps(), \
8148 (__mmask16)-1,\
8149 (R)))
8150
8151 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8152 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8153 (int)(((C)<<2) | (B)), \
8154 (__v16sf)(__m512)(W), \
8155 (__mmask16)(U),\
8156 (R)))
8157
8158 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8159 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8160 (int)(((C)<<2) | (B)), \
8161 (__v16sf)(__m512)_mm512_setzero_ps(), \
8162 (__mmask16)(U),\
8163 (R)))
8164 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8165 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8166 (__v2df)(__m128d)(Y), \
8167 (int)(((D)<<2) | (C)), \
8168 (R)))
8169
8170 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8171 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8172 (__v4sf)(__m128)(Y), \
8173 (int)(((D)<<2) | (C)), \
8174 (R)))
8175
8176 #define _mm_getexp_round_ss(A, B, R) \
8177 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8178
8179 #define _mm_getexp_round_sd(A, B, R) \
8180 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8181
8182 #define _mm512_getexp_round_ps(A, R) \
8183 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8184 (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, R))
8185
8186 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8187 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8188 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8189
8190 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8191 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8192 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8193
8194 #define _mm512_getexp_round_pd(A, R) \
8195 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8196 (__v8df)_mm512_setzero_pd(), (__mmask8)-1, R))
8197
8198 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8199 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8200 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8201
8202 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8203 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8204 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8205 #endif
8206
8207 #ifdef __OPTIMIZE__
8208 extern __inline __m512
8209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8210 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8211 {
8212 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8213 (__v16sf) __A, -1, __R);
8214 }
8215
8216 extern __inline __m512
8217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8218 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8219 const int __imm, const int __R)
8220 {
8221 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8222 (__v16sf) __A,
8223 (__mmask16) __B, __R);
8224 }
8225
8226 extern __inline __m512
8227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8229 const int __imm, const int __R)
8230 {
8231 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8232 __imm,
8233 (__v16sf)
8234 _mm512_setzero_ps (),
8235 (__mmask16) __A, __R);
8236 }
8237
8238 extern __inline __m512d
8239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8240 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8241 {
8242 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8243 (__v8df) __A, -1, __R);
8244 }
8245
8246 extern __inline __m512d
8247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8248 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8249 __m512d __C, const int __imm, const int __R)
8250 {
8251 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8252 (__v8df) __A,
8253 (__mmask8) __B, __R);
8254 }
8255
8256 extern __inline __m512d
8257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8258 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8259 const int __imm, const int __R)
8260 {
8261 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8262 __imm,
8263 (__v8df)
8264 _mm512_setzero_pd (),
8265 (__mmask8) __A, __R);
8266 }
8267
8268 extern __inline __m128
8269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8270 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8271 {
8272 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8273 (__v4sf) __B, __imm, __R);
8274 }
8275
8276 extern __inline __m128d
8277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8278 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8279 const int __R)
8280 {
8281 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8282 (__v2df) __B, __imm, __R);
8283 }
8284
8285 #else
8286 #define _mm512_roundscale_round_ps(A, B, R) \
8287 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8288 (__v16sf)(__m512)(A), (__mmask16)(-1), R))
8289 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8290 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8291 (int)(D), \
8292 (__v16sf)(__m512)(A), \
8293 (__mmask16)(B), R))
8294 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8295 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8296 (int)(C), \
8297 (__v16sf)_mm512_setzero_ps(),\
8298 (__mmask16)(A), R))
8299 #define _mm512_roundscale_round_pd(A, B, R) \
8300 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8301 (__v8df)(__m512d)(A), (__mmask8)(-1), R))
8302 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8303 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8304 (int)(D), \
8305 (__v8df)(__m512d)(A), \
8306 (__mmask8)(B), R))
8307 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8308 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8309 (int)(C), \
8310 (__v8df)_mm512_setzero_pd(),\
8311 (__mmask8)(A), R))
8312 #define _mm_roundscale_round_ss(A, B, C, R) \
8313 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8314 (__v4sf)(__m128)(B), (int)(C), R))
8315 #define _mm_roundscale_round_sd(A, B, C, R) \
8316 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8317 (__v2df)(__m128d)(B), (int)(C), R))
8318 #endif
8319
8320 extern __inline __m512
8321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8322 _mm512_floor_ps (__m512 __A)
8323 {
8324 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8325 _MM_FROUND_FLOOR,
8326 (__v16sf) __A, -1,
8327 _MM_FROUND_CUR_DIRECTION);
8328 }
8329
8330 extern __inline __m512d
8331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8332 _mm512_floor_pd (__m512d __A)
8333 {
8334 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8335 _MM_FROUND_FLOOR,
8336 (__v8df) __A, -1,
8337 _MM_FROUND_CUR_DIRECTION);
8338 }
8339
8340 extern __inline __m512
8341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8342 _mm512_ceil_ps (__m512 __A)
8343 {
8344 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8345 _MM_FROUND_CEIL,
8346 (__v16sf) __A, -1,
8347 _MM_FROUND_CUR_DIRECTION);
8348 }
8349
8350 extern __inline __m512d
8351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8352 _mm512_ceil_pd (__m512d __A)
8353 {
8354 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8355 _MM_FROUND_CEIL,
8356 (__v8df) __A, -1,
8357 _MM_FROUND_CUR_DIRECTION);
8358 }
8359
8360 extern __inline __m512
8361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8362 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8363 {
8364 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8365 _MM_FROUND_FLOOR,
8366 (__v16sf) __W, __U,
8367 _MM_FROUND_CUR_DIRECTION);
8368 }
8369
8370 extern __inline __m512d
8371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8372 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8373 {
8374 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8375 _MM_FROUND_FLOOR,
8376 (__v8df) __W, __U,
8377 _MM_FROUND_CUR_DIRECTION);
8378 }
8379
8380 extern __inline __m512
8381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8382 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8383 {
8384 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8385 _MM_FROUND_CEIL,
8386 (__v16sf) __W, __U,
8387 _MM_FROUND_CUR_DIRECTION);
8388 }
8389
8390 extern __inline __m512d
8391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8392 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8393 {
8394 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8395 _MM_FROUND_CEIL,
8396 (__v8df) __W, __U,
8397 _MM_FROUND_CUR_DIRECTION);
8398 }
8399
8400 extern __inline __m512
8401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8402 _mm512_maskz_floor_ps (__mmask16 __U, __m512 __A)
8403 {
8404 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8405 _MM_FROUND_FLOOR,
8406 (__v16sf)
8407 _mm512_setzero_ps (),
8408 __U,
8409 _MM_FROUND_CUR_DIRECTION);
8410 }
8411
8412 extern __inline __m512d
8413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8414 _mm512_maskz_floor_pd (__mmask8 __U, __m512d __A)
8415 {
8416 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8417 _MM_FROUND_FLOOR,
8418 (__v8df)
8419 _mm512_setzero_pd (),
8420 __U,
8421 _MM_FROUND_CUR_DIRECTION);
8422 }
8423
8424 extern __inline __m512
8425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8426 _mm512_maskz_ceil_ps (__mmask16 __U, __m512 __A)
8427 {
8428 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8429 _MM_FROUND_CEIL,
8430 (__v16sf)
8431 _mm512_setzero_ps (),
8432 __U,
8433 _MM_FROUND_CUR_DIRECTION);
8434 }
8435
8436 extern __inline __m512d
8437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8438 _mm512_maskz_ceil_pd (__mmask8 __U, __m512d __A)
8439 {
8440 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8441 _MM_FROUND_CEIL,
8442 (__v8df)
8443 _mm512_setzero_pd (),
8444 __U,
8445 _MM_FROUND_CUR_DIRECTION);
8446 }
8447
8448 #ifdef __OPTIMIZE__
8449 extern __inline __m512
8450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8451 _mm512_floor_round_ps (__m512 __A, const int __R)
8452 {
8453 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8454 _MM_FROUND_FLOOR,
8455 (__v16sf) __A, -1, __R);
8456 }
8457
8458 extern __inline __m512d
8459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8460 _mm512_floor_round_pd (__m512d __A, const int __R)
8461 {
8462 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8463 _MM_FROUND_FLOOR,
8464 (__v8df) __A, -1, __R);
8465 }
8466
8467 extern __inline __m512
8468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469 _mm512_ceil_round_ps (__m512 __A, const int __R)
8470 {
8471 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8472 _MM_FROUND_CEIL,
8473 (__v16sf) __A, -1, __R);
8474 }
8475
8476 extern __inline __m512d
8477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8478 _mm512_ceil_round_pd (__m512d __A, const int __R)
8479 {
8480 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8481 _MM_FROUND_CEIL,
8482 (__v8df) __A, -1, __R);
8483 }
8484
8485 extern __inline __m512
8486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8487 _mm512_mask_floor_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8488 const int __R)
8489 {
8490 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8491 _MM_FROUND_FLOOR,
8492 (__v16sf) __W, __U, __R);
8493 }
8494
8495 extern __inline __m512d
8496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8497 _mm512_mask_floor_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8498 const int __R)
8499 {
8500 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8501 _MM_FROUND_FLOOR,
8502 (__v8df) __W, __U, __R);
8503 }
8504
8505 extern __inline __m512
8506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8507 _mm512_mask_ceil_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
8508 {
8509 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8510 _MM_FROUND_CEIL,
8511 (__v16sf) __W, __U, __R);
8512 }
8513
8514 extern __inline __m512d
8515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8516 _mm512_mask_ceil_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8517 const int __R)
8518 {
8519 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8520 _MM_FROUND_CEIL,
8521 (__v8df) __W, __U, __R);
8522 }
8523
8524 extern __inline __m512
8525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8526 _mm512_maskz_floor_round_ps (__mmask16 __U, __m512 __A, const int __R)
8527 {
8528 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8529 _MM_FROUND_FLOOR,
8530 (__v16sf)
8531 _mm512_setzero_ps (),
8532 __U, __R);
8533 }
8534
8535 extern __inline __m512d
8536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8537 _mm512_maskz_floor_round_pd (__mmask8 __U, __m512d __A, const int __R)
8538 {
8539 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8540 _MM_FROUND_FLOOR,
8541 (__v8df)
8542 _mm512_setzero_pd (),
8543 __U, __R);
8544 }
8545
8546 extern __inline __m512
8547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8548 _mm512_maskz_ceil_round_ps (__mmask16 __U, __m512 __A, const int __R)
8549 {
8550 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8551 _MM_FROUND_CEIL,
8552 (__v16sf)
8553 _mm512_setzero_ps (),
8554 __U, __R);
8555 }
8556
8557 extern __inline __m512d
8558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8559 _mm512_maskz_ceil_round_pd (__mmask8 __U, __m512d __A, const int __R)
8560 {
8561 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8562 _MM_FROUND_CEIL,
8563 (__v8df)
8564 _mm512_setzero_pd (),
8565 __U, __R);
8566 }
8567
8568 extern __inline __m512i
8569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8570 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8571 {
8572 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8573 (__v16si) __B, __imm,
8574 (__v16si)
8575 _mm512_setzero_si512 (),
8576 (__mmask16) -1);
8577 }
8578
8579 extern __inline __m512i
8580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8581 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8582 __m512i __B, const int __imm)
8583 {
8584 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8585 (__v16si) __B, __imm,
8586 (__v16si) __W,
8587 (__mmask16) __U);
8588 }
8589
8590 extern __inline __m512i
8591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8592 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8593 const int __imm)
8594 {
8595 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8596 (__v16si) __B, __imm,
8597 (__v16si)
8598 _mm512_setzero_si512 (),
8599 (__mmask16) __U);
8600 }
8601
8602 extern __inline __m512i
8603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8604 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8605 {
8606 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8607 (__v8di) __B, __imm,
8608 (__v8di)
8609 _mm512_setzero_si512 (),
8610 (__mmask8) -1);
8611 }
8612
8613 extern __inline __m512i
8614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8615 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8616 __m512i __B, const int __imm)
8617 {
8618 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8619 (__v8di) __B, __imm,
8620 (__v8di) __W,
8621 (__mmask8) __U);
8622 }
8623
8624 extern __inline __m512i
8625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8626 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8627 const int __imm)
8628 {
8629 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8630 (__v8di) __B, __imm,
8631 (__v8di)
8632 _mm512_setzero_si512 (),
8633 (__mmask8) __U);
8634 }
8635 #else
8636 #define _mm512_floor_round_ps(A, R) \
8637 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), \
8638 _MM_FROUND_FLOOR, \
8639 (__v16sf)(__m512)(A), \
8640 (__mmask16)(-1), R))
8641 #define _mm512_mask_floor_round_ps(A, B, C, R) \
8642 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8643 _MM_FROUND_FLOOR, \
8644 (__v16sf)(__m512)(A), \
8645 (__mmask16)(B), R))
8646 #define _mm512_maskz_floor_round_ps(A, B, R) \
8647 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8648 _MM_FROUND_FLOOR, \
8649 (__v16sf)_mm512_setzero_ps(),\
8650 (__mmask16)(A), R))
8651 #define _mm512_floor_round_pd(A, R) \
8652 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), \
8653 _MM_FROUND_FLOOR, \
8654 (__v8df)(__m512d)(A), \
8655 (__mmask8)(-1), R))
8656 #define _mm512_mask_floor_round_pd(A, B, C, R) \
8657 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8658 _MM_FROUND_FLOOR, \
8659 (__v8df)(__m512d)(A), \
8660 (__mmask8)(B), R))
8661 #define _mm512_maskz_floor_round_pd(A, B, R) \
8662 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8663 _MM_FROUND_FLOOR, \
8664 (__v8df)_mm512_setzero_pd(),\
8665 (__mmask8)(A), R))
8666 #define _mm512_ceil_round_ps(A, R) \
8667 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), \
8668 _MM_FROUND_CEIL, \
8669 (__v16sf)(__m512)(A), \
8670 (__mmask16)(-1), R))
8671 #define _mm512_mask_ceil_round_ps(A, B, C, R) \
8672 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8673 _MM_FROUND_CEIL, \
8674 (__v16sf)(__m512)(A), \
8675 (__mmask16)(B), R))
8676 #define _mm512_maskz_ceil_round_ps(A, B, R) \
8677 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8678 _MM_FROUND_CEIL, \
8679 (__v16sf)_mm512_setzero_ps(),\
8680 (__mmask16)(A), R))
8681 #define _mm512_ceil_round_pd(A, R) \
8682 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), \
8683 _MM_FROUND_CEIL, \
8684 (__v8df)(__m512d)(A), \
8685 (__mmask8)(-1), R))
8686 #define _mm512_mask_ceil_round_pd(A, B, C, R) \
8687 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8688 _MM_FROUND_CEIL, \
8689 (__v8df)(__m512d)(A), \
8690 (__mmask8)(B), R))
8691 #define _mm512_maskz_ceil_round_pd(A, B, R) \
8692 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8693 _MM_FROUND_CEIL, \
8694 (__v8df)_mm512_setzero_pd(),\
8695 (__mmask8)(A), R))
8696
8697 #define _mm512_alignr_epi32(X, Y, C) \
8698 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8699 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(X), \
8700 (__mmask16)-1))
8701
8702 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8703 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8704 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8705 (__mmask16)(U)))
8706
8707 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8708 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8709 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)_mm512_setzero_si512 (),\
8710 (__mmask16)(U)))
8711
8712 #define _mm512_alignr_epi64(X, Y, C) \
8713 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8714 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(X), (__mmask8)-1))
8715
8716 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8717 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8718 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8719
8720 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8721 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8722 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)_mm512_setzero_si512 (),\
8723 (__mmask8)(U)))
8724 #endif
8725
8726 extern __inline __mmask16
8727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8728 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8729 {
8730 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8731 (__v16si) __B,
8732 (__mmask16) -1);
8733 }
8734
8735 extern __inline __mmask16
8736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8737 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8738 {
8739 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8740 (__v16si) __B, __U);
8741 }
8742
8743 extern __inline __mmask8
8744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8745 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8746 {
8747 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8748 (__v8di) __B, __U);
8749 }
8750
8751 extern __inline __mmask8
8752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8753 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8754 {
8755 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8756 (__v8di) __B,
8757 (__mmask8) -1);
8758 }
8759
8760 extern __inline __mmask16
8761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8762 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8763 {
8764 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8765 (__v16si) __B,
8766 (__mmask16) -1);
8767 }
8768
8769 extern __inline __mmask16
8770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8771 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8772 {
8773 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8774 (__v16si) __B, __U);
8775 }
8776
8777 extern __inline __mmask8
8778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8779 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8780 {
8781 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8782 (__v8di) __B, __U);
8783 }
8784
8785 extern __inline __mmask8
8786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8787 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8788 {
8789 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8790 (__v8di) __B,
8791 (__mmask8) -1);
8792 }
8793
8794 extern __inline __mmask16
8795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8796 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8797 {
8798 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8799 (__v16si) __Y, 5,
8800 (__mmask16) -1);
8801 }
8802
8803 extern __inline __mmask16
8804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8805 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8806 {
8807 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8808 (__v16si) __Y, 5,
8809 (__mmask16) -1);
8810 }
8811
8812 extern __inline __mmask8
8813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8814 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8815 {
8816 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8817 (__v8di) __Y, 5,
8818 (__mmask8) -1);
8819 }
8820
8821 extern __inline __mmask8
8822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8823 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8824 {
8825 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8826 (__v8di) __Y, 5,
8827 (__mmask8) -1);
8828 }
8829
8830 extern __inline __mmask16
8831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8832 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8833 {
8834 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8835 (__v16si) __Y, 2,
8836 (__mmask16) -1);
8837 }
8838
8839 extern __inline __mmask16
8840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8841 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8842 {
8843 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8844 (__v16si) __Y, 2,
8845 (__mmask16) -1);
8846 }
8847
8848 extern __inline __mmask8
8849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8850 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8851 {
8852 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8853 (__v8di) __Y, 2,
8854 (__mmask8) -1);
8855 }
8856
8857 extern __inline __mmask8
8858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8859 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8860 {
8861 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8862 (__v8di) __Y, 2,
8863 (__mmask8) -1);
8864 }
8865
8866 extern __inline __mmask16
8867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8868 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8869 {
8870 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8871 (__v16si) __Y, 1,
8872 (__mmask16) -1);
8873 }
8874
8875 extern __inline __mmask16
8876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8877 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8878 {
8879 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8880 (__v16si) __Y, 1,
8881 (__mmask16) -1);
8882 }
8883
8884 extern __inline __mmask8
8885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8886 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8887 {
8888 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8889 (__v8di) __Y, 1,
8890 (__mmask8) -1);
8891 }
8892
8893 extern __inline __mmask8
8894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8895 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8896 {
8897 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8898 (__v8di) __Y, 1,
8899 (__mmask8) -1);
8900 }
8901
8902 extern __inline __mmask16
8903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8904 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8905 {
8906 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8907 (__v16si) __Y, 4,
8908 (__mmask16) -1);
8909 }
8910
8911 extern __inline __mmask16
8912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8913 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8914 {
8915 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8916 (__v16si) __Y, 4,
8917 (__mmask16) -1);
8918 }
8919
8920 extern __inline __mmask8
8921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8922 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8923 {
8924 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8925 (__v8di) __Y, 4,
8926 (__mmask8) -1);
8927 }
8928
8929 extern __inline __mmask8
8930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8931 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8932 {
8933 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8934 (__v8di) __Y, 4,
8935 (__mmask8) -1);
8936 }
8937
8938 #define _MM_CMPINT_EQ 0x0
8939 #define _MM_CMPINT_LT 0x1
8940 #define _MM_CMPINT_LE 0x2
8941 #define _MM_CMPINT_UNUSED 0x3
8942 #define _MM_CMPINT_NE 0x4
8943 #define _MM_CMPINT_NLT 0x5
8944 #define _MM_CMPINT_GE 0x5
8945 #define _MM_CMPINT_NLE 0x6
8946 #define _MM_CMPINT_GT 0x6
8947
8948 #ifdef __OPTIMIZE__
8949 extern __inline __mmask8
8950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8951 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8952 {
8953 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8954 (__v8di) __Y, __P,
8955 (__mmask8) -1);
8956 }
8957
8958 extern __inline __mmask16
8959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8960 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8961 {
8962 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8963 (__v16si) __Y, __P,
8964 (__mmask16) -1);
8965 }
8966
8967 extern __inline __mmask8
8968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8969 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8970 {
8971 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8972 (__v8di) __Y, __P,
8973 (__mmask8) -1);
8974 }
8975
8976 extern __inline __mmask16
8977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8978 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
8979 {
8980 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8981 (__v16si) __Y, __P,
8982 (__mmask16) -1);
8983 }
8984
8985 extern __inline __mmask8
8986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8987 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
8988 const int __R)
8989 {
8990 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
8991 (__v8df) __Y, __P,
8992 (__mmask8) -1, __R);
8993 }
8994
8995 extern __inline __mmask16
8996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8997 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
8998 {
8999 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9000 (__v16sf) __Y, __P,
9001 (__mmask16) -1, __R);
9002 }
9003
9004 extern __inline __mmask8
9005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9006 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9007 const int __P)
9008 {
9009 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9010 (__v8di) __Y, __P,
9011 (__mmask8) __U);
9012 }
9013
9014 extern __inline __mmask16
9015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9016 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9017 const int __P)
9018 {
9019 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9020 (__v16si) __Y, __P,
9021 (__mmask16) __U);
9022 }
9023
9024 extern __inline __mmask8
9025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9026 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9027 const int __P)
9028 {
9029 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9030 (__v8di) __Y, __P,
9031 (__mmask8) __U);
9032 }
9033
9034 extern __inline __mmask16
9035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9036 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9037 const int __P)
9038 {
9039 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9040 (__v16si) __Y, __P,
9041 (__mmask16) __U);
9042 }
9043
9044 extern __inline __mmask8
9045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9046 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9047 const int __P, const int __R)
9048 {
9049 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9050 (__v8df) __Y, __P,
9051 (__mmask8) __U, __R);
9052 }
9053
9054 extern __inline __mmask16
9055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9056 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9057 const int __P, const int __R)
9058 {
9059 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9060 (__v16sf) __Y, __P,
9061 (__mmask16) __U, __R);
9062 }
9063
9064 extern __inline __mmask8
9065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9066 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9067 {
9068 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9069 (__v2df) __Y, __P,
9070 (__mmask8) -1, __R);
9071 }
9072
9073 extern __inline __mmask8
9074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9075 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9076 const int __P, const int __R)
9077 {
9078 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9079 (__v2df) __Y, __P,
9080 (__mmask8) __M, __R);
9081 }
9082
9083 extern __inline __mmask8
9084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9085 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9086 {
9087 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9088 (__v4sf) __Y, __P,
9089 (__mmask8) -1, __R);
9090 }
9091
9092 extern __inline __mmask8
9093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9094 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9095 const int __P, const int __R)
9096 {
9097 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9098 (__v4sf) __Y, __P,
9099 (__mmask8) __M, __R);
9100 }
9101
9102 #else
9103 #define _mm512_cmp_epi64_mask(X, Y, P) \
9104 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9105 (__v8di)(__m512i)(Y), (int)(P),\
9106 (__mmask8)-1))
9107
9108 #define _mm512_cmp_epi32_mask(X, Y, P) \
9109 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9110 (__v16si)(__m512i)(Y), (int)(P),\
9111 (__mmask16)-1))
9112
9113 #define _mm512_cmp_epu64_mask(X, Y, P) \
9114 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9115 (__v8di)(__m512i)(Y), (int)(P),\
9116 (__mmask8)-1))
9117
9118 #define _mm512_cmp_epu32_mask(X, Y, P) \
9119 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9120 (__v16si)(__m512i)(Y), (int)(P),\
9121 (__mmask16)-1))
9122
9123 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9124 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9125 (__v8df)(__m512d)(Y), (int)(P),\
9126 (__mmask8)-1, R))
9127
9128 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9129 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9130 (__v16sf)(__m512)(Y), (int)(P),\
9131 (__mmask16)-1, R))
9132
9133 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9134 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9135 (__v8di)(__m512i)(Y), (int)(P),\
9136 (__mmask8)M))
9137
9138 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9139 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9140 (__v16si)(__m512i)(Y), (int)(P),\
9141 (__mmask16)M))
9142
9143 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9144 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9145 (__v8di)(__m512i)(Y), (int)(P),\
9146 (__mmask8)M))
9147
9148 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9149 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9150 (__v16si)(__m512i)(Y), (int)(P),\
9151 (__mmask16)M))
9152
9153 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9154 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9155 (__v8df)(__m512d)(Y), (int)(P),\
9156 (__mmask8)M, R))
9157
9158 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9159 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9160 (__v16sf)(__m512)(Y), (int)(P),\
9161 (__mmask16)M, R))
9162
9163 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9164 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9165 (__v2df)(__m128d)(Y), (int)(P),\
9166 (__mmask8)-1, R))
9167
9168 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9169 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9170 (__v2df)(__m128d)(Y), (int)(P),\
9171 (M), R))
9172
9173 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9174 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9175 (__v4sf)(__m128)(Y), (int)(P), \
9176 (__mmask8)-1, R))
9177
9178 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9179 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9180 (__v4sf)(__m128)(Y), (int)(P), \
9181 (M), R))
9182 #endif
9183
9184 #ifdef __OPTIMIZE__
9185 extern __inline __m512
9186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9187 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9188 {
9189 __m512 v1_old = _mm512_setzero_ps ();
9190 __mmask16 mask = 0xFFFF;
9191
9192 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9193 __addr,
9194 (__v16si) __index,
9195 mask, __scale);
9196 }
9197
9198 extern __inline __m512
9199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9200 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9201 __m512i __index, float const *__addr, int __scale)
9202 {
9203 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9204 __addr,
9205 (__v16si) __index,
9206 __mask, __scale);
9207 }
9208
9209 extern __inline __m512d
9210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9211 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9212 {
9213 __m512d v1_old = _mm512_setzero_pd ();
9214 __mmask8 mask = 0xFF;
9215
9216 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9217 __addr,
9218 (__v8si) __index, mask,
9219 __scale);
9220 }
9221
9222 extern __inline __m512d
9223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9224 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9225 __m256i __index, double const *__addr, int __scale)
9226 {
9227 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9228 __addr,
9229 (__v8si) __index,
9230 __mask, __scale);
9231 }
9232
9233 extern __inline __m256
9234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9235 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9236 {
9237 __m256 v1_old = _mm256_setzero_ps ();
9238 __mmask8 mask = 0xFF;
9239
9240 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9241 __addr,
9242 (__v8di) __index, mask,
9243 __scale);
9244 }
9245
9246 extern __inline __m256
9247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9248 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9249 __m512i __index, float const *__addr, int __scale)
9250 {
9251 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9252 __addr,
9253 (__v8di) __index,
9254 __mask, __scale);
9255 }
9256
9257 extern __inline __m512d
9258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9259 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9260 {
9261 __m512d v1_old = _mm512_setzero_pd ();
9262 __mmask8 mask = 0xFF;
9263
9264 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9265 __addr,
9266 (__v8di) __index, mask,
9267 __scale);
9268 }
9269
9270 extern __inline __m512d
9271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9272 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9273 __m512i __index, double const *__addr, int __scale)
9274 {
9275 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9276 __addr,
9277 (__v8di) __index,
9278 __mask, __scale);
9279 }
9280
9281 extern __inline __m512i
9282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9284 {
9285 __m512i v1_old = _mm512_setzero_si512 ();
9286 __mmask16 mask = 0xFFFF;
9287
9288 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9289 __addr,
9290 (__v16si) __index,
9291 mask, __scale);
9292 }
9293
9294 extern __inline __m512i
9295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9296 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9297 __m512i __index, int const *__addr, int __scale)
9298 {
9299 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9300 __addr,
9301 (__v16si) __index,
9302 __mask, __scale);
9303 }
9304
9305 extern __inline __m512i
9306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9307 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9308 {
9309 __m512i v1_old = _mm512_setzero_si512 ();
9310 __mmask8 mask = 0xFF;
9311
9312 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9313 __addr,
9314 (__v8si) __index, mask,
9315 __scale);
9316 }
9317
9318 extern __inline __m512i
9319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9320 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9321 __m256i __index, long long const *__addr,
9322 int __scale)
9323 {
9324 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9325 __addr,
9326 (__v8si) __index,
9327 __mask, __scale);
9328 }
9329
9330 extern __inline __m256i
9331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9332 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9333 {
9334 __m256i v1_old = _mm256_setzero_si256 ();
9335 __mmask8 mask = 0xFF;
9336
9337 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9338 __addr,
9339 (__v8di) __index,
9340 mask, __scale);
9341 }
9342
9343 extern __inline __m256i
9344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9345 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9346 __m512i __index, int const *__addr, int __scale)
9347 {
9348 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9349 __addr,
9350 (__v8di) __index,
9351 __mask, __scale);
9352 }
9353
9354 extern __inline __m512i
9355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9356 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9357 {
9358 __m512i v1_old = _mm512_setzero_si512 ();
9359 __mmask8 mask = 0xFF;
9360
9361 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9362 __addr,
9363 (__v8di) __index, mask,
9364 __scale);
9365 }
9366
9367 extern __inline __m512i
9368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9369 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9370 __m512i __index, long long const *__addr,
9371 int __scale)
9372 {
9373 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9374 __addr,
9375 (__v8di) __index,
9376 __mask, __scale);
9377 }
9378
9379 extern __inline void
9380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9381 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9382 {
9383 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9384 (__v16si) __index, (__v16sf) __v1, __scale);
9385 }
9386
9387 extern __inline void
9388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9389 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9390 __m512i __index, __m512 __v1, int __scale)
9391 {
9392 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9393 (__v16sf) __v1, __scale);
9394 }
9395
9396 extern __inline void
9397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9398 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9399 int __scale)
9400 {
9401 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9402 (__v8si) __index, (__v8df) __v1, __scale);
9403 }
9404
9405 extern __inline void
9406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9407 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9408 __m256i __index, __m512d __v1, int __scale)
9409 {
9410 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9411 (__v8df) __v1, __scale);
9412 }
9413
9414 extern __inline void
9415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9416 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9417 {
9418 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9419 (__v8di) __index, (__v8sf) __v1, __scale);
9420 }
9421
9422 extern __inline void
9423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9424 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9425 __m512i __index, __m256 __v1, int __scale)
9426 {
9427 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9428 (__v8sf) __v1, __scale);
9429 }
9430
9431 extern __inline void
9432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9433 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9434 int __scale)
9435 {
9436 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9437 (__v8di) __index, (__v8df) __v1, __scale);
9438 }
9439
9440 extern __inline void
9441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9442 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9443 __m512i __index, __m512d __v1, int __scale)
9444 {
9445 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9446 (__v8df) __v1, __scale);
9447 }
9448
9449 extern __inline void
9450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9451 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9452 __m512i __v1, int __scale)
9453 {
9454 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9455 (__v16si) __index, (__v16si) __v1, __scale);
9456 }
9457
9458 extern __inline void
9459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9460 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9461 __m512i __index, __m512i __v1, int __scale)
9462 {
9463 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9464 (__v16si) __v1, __scale);
9465 }
9466
9467 extern __inline void
9468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9469 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9470 __m512i __v1, int __scale)
9471 {
9472 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9473 (__v8si) __index, (__v8di) __v1, __scale);
9474 }
9475
9476 extern __inline void
9477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9478 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9479 __m256i __index, __m512i __v1, int __scale)
9480 {
9481 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9482 (__v8di) __v1, __scale);
9483 }
9484
9485 extern __inline void
9486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9487 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9488 __m256i __v1, int __scale)
9489 {
9490 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9491 (__v8di) __index, (__v8si) __v1, __scale);
9492 }
9493
9494 extern __inline void
9495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9496 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9497 __m512i __index, __m256i __v1, int __scale)
9498 {
9499 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9500 (__v8si) __v1, __scale);
9501 }
9502
9503 extern __inline void
9504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9505 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9506 __m512i __v1, int __scale)
9507 {
9508 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9509 (__v8di) __index, (__v8di) __v1, __scale);
9510 }
9511
9512 extern __inline void
9513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9514 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9515 __m512i __index, __m512i __v1, int __scale)
9516 {
9517 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9518 (__v8di) __v1, __scale);
9519 }
9520 #else
9521 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9522 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_setzero_ps(), \
9523 (float const *)ADDR, \
9524 (__v16si)(__m512i)INDEX, \
9525 (__mmask16)0xFFFF, (int)SCALE)
9526
9527 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9528 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9529 (float const *)ADDR, \
9530 (__v16si)(__m512i)INDEX, \
9531 (__mmask16)MASK, (int)SCALE)
9532
9533 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9534 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_setzero_pd(), \
9535 (double const *)ADDR, \
9536 (__v8si)(__m256i)INDEX, \
9537 (__mmask8)0xFF, (int)SCALE)
9538
9539 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9540 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9541 (double const *)ADDR, \
9542 (__v8si)(__m256i)INDEX, \
9543 (__mmask8)MASK, (int)SCALE)
9544
9545 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9546 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_setzero_ps(), \
9547 (float const *)ADDR, \
9548 (__v8di)(__m512i)INDEX, \
9549 (__mmask8)0xFF, (int)SCALE)
9550
9551 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9552 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9553 (float const *)ADDR, \
9554 (__v8di)(__m512i)INDEX, \
9555 (__mmask8)MASK, (int)SCALE)
9556
9557 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9558 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_setzero_pd(), \
9559 (double const *)ADDR, \
9560 (__v8di)(__m512i)INDEX, \
9561 (__mmask8)0xFF, (int)SCALE)
9562
9563 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9564 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9565 (double const *)ADDR, \
9566 (__v8di)(__m512i)INDEX, \
9567 (__mmask8)MASK, (int)SCALE)
9568
9569 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9570 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_setzero_si512 (), \
9571 (int const *)ADDR, \
9572 (__v16si)(__m512i)INDEX, \
9573 (__mmask16)0xFFFF, (int)SCALE)
9574
9575 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9576 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9577 (int const *)ADDR, \
9578 (__v16si)(__m512i)INDEX, \
9579 (__mmask16)MASK, (int)SCALE)
9580
9581 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9582 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_setzero_si512 (), \
9583 (long long const *)ADDR, \
9584 (__v8si)(__m256i)INDEX, \
9585 (__mmask8)0xFF, (int)SCALE)
9586
9587 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9588 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9589 (long long const *)ADDR, \
9590 (__v8si)(__m256i)INDEX, \
9591 (__mmask8)MASK, (int)SCALE)
9592
9593 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9594 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_setzero_si256(), \
9595 (int const *)ADDR, \
9596 (__v8di)(__m512i)INDEX, \
9597 (__mmask8)0xFF, (int)SCALE)
9598
9599 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9600 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9601 (int const *)ADDR, \
9602 (__v8di)(__m512i)INDEX, \
9603 (__mmask8)MASK, (int)SCALE)
9604
9605 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9606 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_setzero_si512 (), \
9607 (long long const *)ADDR, \
9608 (__v8di)(__m512i)INDEX, \
9609 (__mmask8)0xFF, (int)SCALE)
9610
9611 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9612 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9613 (long long const *)ADDR, \
9614 (__v8di)(__m512i)INDEX, \
9615 (__mmask8)MASK, (int)SCALE)
9616
9617 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9618 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9619 (__v16si)(__m512i)INDEX, \
9620 (__v16sf)(__m512)V1, (int)SCALE)
9621
9622 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9623 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9624 (__v16si)(__m512i)INDEX, \
9625 (__v16sf)(__m512)V1, (int)SCALE)
9626
9627 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9628 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9629 (__v8si)(__m256i)INDEX, \
9630 (__v8df)(__m512d)V1, (int)SCALE)
9631
9632 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9633 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9634 (__v8si)(__m256i)INDEX, \
9635 (__v8df)(__m512d)V1, (int)SCALE)
9636
9637 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9638 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9639 (__v8di)(__m512i)INDEX, \
9640 (__v8sf)(__m256)V1, (int)SCALE)
9641
9642 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9643 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9644 (__v8di)(__m512i)INDEX, \
9645 (__v8sf)(__m256)V1, (int)SCALE)
9646
9647 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9648 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9649 (__v8di)(__m512i)INDEX, \
9650 (__v8df)(__m512d)V1, (int)SCALE)
9651
9652 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9653 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9654 (__v8di)(__m512i)INDEX, \
9655 (__v8df)(__m512d)V1, (int)SCALE)
9656
9657 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9658 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9659 (__v16si)(__m512i)INDEX, \
9660 (__v16si)(__m512i)V1, (int)SCALE)
9661
9662 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9663 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9664 (__v16si)(__m512i)INDEX, \
9665 (__v16si)(__m512i)V1, (int)SCALE)
9666
9667 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9668 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9669 (__v8si)(__m256i)INDEX, \
9670 (__v8di)(__m512i)V1, (int)SCALE)
9671
9672 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9673 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9674 (__v8si)(__m256i)INDEX, \
9675 (__v8di)(__m512i)V1, (int)SCALE)
9676
9677 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9678 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9679 (__v8di)(__m512i)INDEX, \
9680 (__v8si)(__m256i)V1, (int)SCALE)
9681
9682 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9683 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9684 (__v8di)(__m512i)INDEX, \
9685 (__v8si)(__m256i)V1, (int)SCALE)
9686
9687 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9688 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9689 (__v8di)(__m512i)INDEX, \
9690 (__v8di)(__m512i)V1, (int)SCALE)
9691
9692 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9693 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9694 (__v8di)(__m512i)INDEX, \
9695 (__v8di)(__m512i)V1, (int)SCALE)
9696 #endif
9697
9698 extern __inline __m512d
9699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9700 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9701 {
9702 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9703 (__v8df) __W,
9704 (__mmask8) __U);
9705 }
9706
9707 extern __inline __m512d
9708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9709 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9710 {
9711 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9712 (__v8df)
9713 _mm512_setzero_pd (),
9714 (__mmask8) __U);
9715 }
9716
9717 extern __inline void
9718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9719 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9720 {
9721 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9722 (__mmask8) __U);
9723 }
9724
9725 extern __inline __m512
9726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9727 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9728 {
9729 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9730 (__v16sf) __W,
9731 (__mmask16) __U);
9732 }
9733
9734 extern __inline __m512
9735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9736 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9737 {
9738 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9739 (__v16sf)
9740 _mm512_setzero_ps (),
9741 (__mmask16) __U);
9742 }
9743
9744 extern __inline void
9745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9746 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9747 {
9748 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9749 (__mmask16) __U);
9750 }
9751
9752 extern __inline __m512i
9753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9754 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9755 {
9756 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9757 (__v8di) __W,
9758 (__mmask8) __U);
9759 }
9760
9761 extern __inline __m512i
9762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9763 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9764 {
9765 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9766 (__v8di)
9767 _mm512_setzero_si512 (),
9768 (__mmask8) __U);
9769 }
9770
9771 extern __inline void
9772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9773 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9774 {
9775 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9776 (__mmask8) __U);
9777 }
9778
9779 extern __inline __m512i
9780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9781 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9782 {
9783 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9784 (__v16si) __W,
9785 (__mmask16) __U);
9786 }
9787
9788 extern __inline __m512i
9789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9790 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9791 {
9792 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9793 (__v16si)
9794 _mm512_setzero_si512 (),
9795 (__mmask16) __U);
9796 }
9797
9798 extern __inline void
9799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9800 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9801 {
9802 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9803 (__mmask16) __U);
9804 }
9805
9806 extern __inline __m512d
9807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9808 _mm512_expand_pd (__m512d __A)
9809 {
9810 return (__m512d) __builtin_ia32_expanddf512 ((__v8df) __A);
9811 }
9812
9813 extern __inline __m512d
9814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9815 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9816 {
9817 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9818 (__v8df) __W,
9819 (__mmask8) __U);
9820 }
9821
9822 extern __inline __m512d
9823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9825 {
9826 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9827 (__v8df)
9828 _mm512_setzero_pd (),
9829 (__mmask8) __U);
9830 }
9831
9832 extern __inline __m512d
9833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9834 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9835 {
9836 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9837 (__v8df) __W,
9838 (__mmask8) __U);
9839 }
9840
9841 extern __inline __m512d
9842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9843 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9844 {
9845 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9846 (__v8df)
9847 _mm512_setzero_pd (),
9848 (__mmask8) __U);
9849 }
9850
9851 extern __inline __m512
9852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9853 _mm512_expand_ps (__m512 __A)
9854 {
9855 return (__m512) __builtin_ia32_expandsf512 ((__v16sf) __A);
9856 }
9857
9858 extern __inline __m512
9859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9860 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9861 {
9862 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9863 (__v16sf) __W,
9864 (__mmask16) __U);
9865 }
9866
9867 extern __inline __m512
9868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9869 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9870 {
9871 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9872 (__v16sf)
9873 _mm512_setzero_ps (),
9874 (__mmask16) __U);
9875 }
9876
9877 extern __inline __m512
9878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9879 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9880 {
9881 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9882 (__v16sf) __W,
9883 (__mmask16) __U);
9884 }
9885
9886 extern __inline __m512
9887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9888 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9889 {
9890 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9891 (__v16sf)
9892 _mm512_setzero_ps (),
9893 (__mmask16) __U);
9894 }
9895
9896 extern __inline __m512i
9897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9898 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9899 {
9900 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9901 (__v8di) __W,
9902 (__mmask8) __U);
9903 }
9904
9905 extern __inline __m512i
9906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9907 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9908 {
9909 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9910 (__v8di)
9911 _mm512_setzero_si512 (),
9912 (__mmask8) __U);
9913 }
9914
9915 extern __inline __m512i
9916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9917 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9918 {
9919 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9920 (__v8di) __W,
9921 (__mmask8) __U);
9922 }
9923
9924 extern __inline __m512i
9925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9927 {
9928 return (__m512i)
9929 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9930 (__v8di)
9931 _mm512_setzero_si512 (),
9932 (__mmask8) __U);
9933 }
9934
9935 extern __inline __m512i
9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9938 {
9939 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9940 (__v16si) __W,
9941 (__mmask16) __U);
9942 }
9943
9944 extern __inline __m512i
9945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9946 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9947 {
9948 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9949 (__v16si)
9950 _mm512_setzero_si512 (),
9951 (__mmask16) __U);
9952 }
9953
9954 extern __inline __m512i
9955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9956 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9957 {
9958 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9959 (__v16si) __W,
9960 (__mmask16) __U);
9961 }
9962
9963 extern __inline __m512i
9964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9965 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9966 {
9967 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9968 (__v16si)
9969 _mm512_setzero_si512
9970 (), (__mmask16) __U);
9971 }
9972
9973 /* Mask arithmetic operations */
9974 extern __inline __mmask16
9975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9976 _mm512_kand (__mmask16 __A, __mmask16 __B)
9977 {
9978 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9979 }
9980
9981 extern __inline __mmask16
9982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9983 _mm512_kandn (__mmask16 __A, __mmask16 __B)
9984 {
9985 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9986 }
9987
9988 extern __inline __mmask16
9989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9990 _mm512_kor (__mmask16 __A, __mmask16 __B)
9991 {
9992 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
9993 }
9994
9995 extern __inline int
9996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9997 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
9998 {
9999 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10000 (__mmask16) __B);
10001 }
10002
10003 extern __inline int
10004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10005 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10006 {
10007 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10008 (__mmask16) __B);
10009 }
10010
10011 extern __inline __mmask16
10012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10013 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10014 {
10015 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10016 }
10017
10018 extern __inline __mmask16
10019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10020 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10021 {
10022 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10023 }
10024
10025 extern __inline __mmask16
10026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10027 _mm512_knot (__mmask16 __A)
10028 {
10029 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10030 }
10031
10032 extern __inline __mmask16
10033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10034 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10035 {
10036 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10037 }
10038
10039 #ifdef __OPTIMIZE__
10040 extern __inline __m512i
10041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10042 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10043 const int __imm)
10044 {
10045 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10046 (__v4si) __D,
10047 __imm,
10048 (__v16si)
10049 _mm512_setzero_si512 (),
10050 __B);
10051 }
10052
10053 extern __inline __m512
10054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10055 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10056 const int __imm)
10057 {
10058 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10059 (__v4sf) __D,
10060 __imm,
10061 (__v16sf)
10062 _mm512_setzero_ps (), __B);
10063 }
10064
10065 extern __inline __m512i
10066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10067 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10068 __m128i __D, const int __imm)
10069 {
10070 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10071 (__v4si) __D,
10072 __imm,
10073 (__v16si) __A,
10074 __B);
10075 }
10076
10077 extern __inline __m512
10078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10079 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10080 __m128 __D, const int __imm)
10081 {
10082 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10083 (__v4sf) __D,
10084 __imm,
10085 (__v16sf) __A, __B);
10086 }
10087 #else
10088 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10089 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10090 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10091 (__mmask8)(A)))
10092
10093 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10094 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10095 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10096 (__mmask8)(A)))
10097
10098 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10099 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10100 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10101 (__mmask8)(B)))
10102
10103 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10104 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10105 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10106 (__mmask8)(B)))
10107 #endif
10108
10109 extern __inline __m512i
10110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10111 _mm512_max_epi64 (__m512i __A, __m512i __B)
10112 {
10113 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10114 (__v8di) __B,
10115 (__v8di)
10116 _mm512_setzero_si512 (),
10117 (__mmask8) -1);
10118 }
10119
10120 extern __inline __m512i
10121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10122 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10123 {
10124 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10125 (__v8di) __B,
10126 (__v8di)
10127 _mm512_setzero_si512 (),
10128 __M);
10129 }
10130
10131 extern __inline __m512i
10132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10133 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10134 {
10135 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10136 (__v8di) __B,
10137 (__v8di) __W, __M);
10138 }
10139
10140 extern __inline __m512i
10141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10142 _mm512_min_epi64 (__m512i __A, __m512i __B)
10143 {
10144 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10145 (__v8di) __B,
10146 (__v8di)
10147 _mm512_setzero_si512 (),
10148 (__mmask8) -1);
10149 }
10150
10151 extern __inline __m512i
10152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10153 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10154 {
10155 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10156 (__v8di) __B,
10157 (__v8di) __W, __M);
10158 }
10159
10160 extern __inline __m512i
10161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10162 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10163 {
10164 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10165 (__v8di) __B,
10166 (__v8di)
10167 _mm512_setzero_si512 (),
10168 __M);
10169 }
10170
10171 extern __inline __m512i
10172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10173 _mm512_max_epu64 (__m512i __A, __m512i __B)
10174 {
10175 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10176 (__v8di) __B,
10177 (__v8di)
10178 _mm512_setzero_si512 (),
10179 (__mmask8) -1);
10180 }
10181
10182 extern __inline __m512i
10183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10184 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10185 {
10186 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10187 (__v8di) __B,
10188 (__v8di)
10189 _mm512_setzero_si512 (),
10190 __M);
10191 }
10192
10193 extern __inline __m512i
10194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10195 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10196 {
10197 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10198 (__v8di) __B,
10199 (__v8di) __W, __M);
10200 }
10201
10202 extern __inline __m512i
10203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10204 _mm512_min_epu64 (__m512i __A, __m512i __B)
10205 {
10206 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10207 (__v8di) __B,
10208 (__v8di)
10209 _mm512_setzero_si512 (),
10210 (__mmask8) -1);
10211 }
10212
10213 extern __inline __m512i
10214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10215 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10216 {
10217 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10218 (__v8di) __B,
10219 (__v8di) __W, __M);
10220 }
10221
10222 extern __inline __m512i
10223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10224 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10225 {
10226 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10227 (__v8di) __B,
10228 (__v8di)
10229 _mm512_setzero_si512 (),
10230 __M);
10231 }
10232
10233 extern __inline __m512i
10234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10235 _mm512_max_epi32 (__m512i __A, __m512i __B)
10236 {
10237 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10238 (__v16si) __B,
10239 (__v16si)
10240 _mm512_setzero_si512 (),
10241 (__mmask16) -1);
10242 }
10243
10244 extern __inline __m512i
10245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10246 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10247 {
10248 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10249 (__v16si) __B,
10250 (__v16si)
10251 _mm512_setzero_si512 (),
10252 __M);
10253 }
10254
10255 extern __inline __m512i
10256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10257 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10258 {
10259 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10260 (__v16si) __B,
10261 (__v16si) __W, __M);
10262 }
10263
10264 extern __inline __m512i
10265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10266 _mm512_min_epi32 (__m512i __A, __m512i __B)
10267 {
10268 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10269 (__v16si) __B,
10270 (__v16si)
10271 _mm512_setzero_si512 (),
10272 (__mmask16) -1);
10273 }
10274
10275 extern __inline __m512i
10276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10278 {
10279 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10280 (__v16si) __B,
10281 (__v16si)
10282 _mm512_setzero_si512 (),
10283 __M);
10284 }
10285
10286 extern __inline __m512i
10287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10288 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10289 {
10290 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10291 (__v16si) __B,
10292 (__v16si) __W, __M);
10293 }
10294
10295 extern __inline __m512i
10296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10297 _mm512_max_epu32 (__m512i __A, __m512i __B)
10298 {
10299 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10300 (__v16si) __B,
10301 (__v16si)
10302 _mm512_setzero_si512 (),
10303 (__mmask16) -1);
10304 }
10305
10306 extern __inline __m512i
10307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10308 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10309 {
10310 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10311 (__v16si) __B,
10312 (__v16si)
10313 _mm512_setzero_si512 (),
10314 __M);
10315 }
10316
10317 extern __inline __m512i
10318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10319 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10320 {
10321 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10322 (__v16si) __B,
10323 (__v16si) __W, __M);
10324 }
10325
10326 extern __inline __m512i
10327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10328 _mm512_min_epu32 (__m512i __A, __m512i __B)
10329 {
10330 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10331 (__v16si) __B,
10332 (__v16si)
10333 _mm512_setzero_si512 (),
10334 (__mmask16) -1);
10335 }
10336
10337 extern __inline __m512i
10338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10339 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10340 {
10341 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10342 (__v16si) __B,
10343 (__v16si)
10344 _mm512_setzero_si512 (),
10345 __M);
10346 }
10347
10348 extern __inline __m512i
10349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10350 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10351 {
10352 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10353 (__v16si) __B,
10354 (__v16si) __W, __M);
10355 }
10356
10357 extern __inline __m512
10358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10359 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10360 {
10361 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10362 (__v16sf) __B,
10363 (__v16sf)
10364 _mm512_setzero_ps (),
10365 (__mmask16) -1);
10366 }
10367
10368 extern __inline __m512
10369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10370 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10371 {
10372 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10373 (__v16sf) __B,
10374 (__v16sf) __W,
10375 (__mmask16) __U);
10376 }
10377
10378 extern __inline __m512
10379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10380 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10381 {
10382 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10383 (__v16sf) __B,
10384 (__v16sf)
10385 _mm512_setzero_ps (),
10386 (__mmask16) __U);
10387 }
10388
10389 #ifdef __OPTIMIZE__
10390 extern __inline __m128d
10391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10392 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10393 {
10394 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10395 (__v2df) __B,
10396 __R);
10397 }
10398
10399 extern __inline __m128
10400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10401 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10402 {
10403 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10404 (__v4sf) __B,
10405 __R);
10406 }
10407
10408 extern __inline __m128d
10409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10410 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10411 {
10412 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10413 (__v2df) __B,
10414 __R);
10415 }
10416
10417 extern __inline __m128
10418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10419 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10420 {
10421 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10422 (__v4sf) __B,
10423 __R);
10424 }
10425
10426 #else
10427 #define _mm_max_round_sd(A, B, C) \
10428 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10429
10430 #define _mm_max_round_ss(A, B, C) \
10431 (__m128)__builtin_ia32_addss_round(A, B, C)
10432
10433 #define _mm_min_round_sd(A, B, C) \
10434 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10435
10436 #define _mm_min_round_ss(A, B, C) \
10437 (__m128)__builtin_ia32_subss_round(A, B, C)
10438 #endif
10439
10440 extern __inline __m512d
10441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10442 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10443 {
10444 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10445 (__v8df) __W,
10446 (__mmask8) __U);
10447 }
10448
10449 extern __inline __m512
10450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10451 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10452 {
10453 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10454 (__v16sf) __W,
10455 (__mmask16) __U);
10456 }
10457
10458 extern __inline __m512i
10459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10460 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10461 {
10462 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10463 (__v8di) __W,
10464 (__mmask8) __U);
10465 }
10466
10467 extern __inline __m512i
10468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10469 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10470 {
10471 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10472 (__v16si) __W,
10473 (__mmask16) __U);
10474 }
10475
10476 #ifdef __OPTIMIZE__
10477 extern __inline __m128d
10478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10479 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10480 {
10481 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10482 (__v2df) __A,
10483 (__v2df) __B,
10484 __R);
10485 }
10486
10487 extern __inline __m128
10488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10489 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10490 {
10491 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10492 (__v4sf) __A,
10493 (__v4sf) __B,
10494 __R);
10495 }
10496
10497 extern __inline __m128d
10498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10499 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10500 {
10501 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10502 (__v2df) __A,
10503 -(__v2df) __B,
10504 __R);
10505 }
10506
10507 extern __inline __m128
10508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10509 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10510 {
10511 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10512 (__v4sf) __A,
10513 -(__v4sf) __B,
10514 __R);
10515 }
10516
10517 extern __inline __m128d
10518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10519 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10520 {
10521 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10522 -(__v2df) __A,
10523 (__v2df) __B,
10524 __R);
10525 }
10526
10527 extern __inline __m128
10528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10529 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10530 {
10531 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10532 -(__v4sf) __A,
10533 (__v4sf) __B,
10534 __R);
10535 }
10536
10537 extern __inline __m128d
10538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10539 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10540 {
10541 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10542 -(__v2df) __A,
10543 -(__v2df) __B,
10544 __R);
10545 }
10546
10547 extern __inline __m128
10548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10549 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10550 {
10551 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10552 -(__v4sf) __A,
10553 -(__v4sf) __B,
10554 __R);
10555 }
10556 #else
10557 #define _mm_fmadd_round_sd(A, B, C, R) \
10558 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10559
10560 #define _mm_fmadd_round_ss(A, B, C, R) \
10561 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10562
10563 #define _mm_fmsub_round_sd(A, B, C, R) \
10564 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10565
10566 #define _mm_fmsub_round_ss(A, B, C, R) \
10567 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10568
10569 #define _mm_fnmadd_round_sd(A, B, C, R) \
10570 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10571
10572 #define _mm_fnmadd_round_ss(A, B, C, R) \
10573 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10574
10575 #define _mm_fnmsub_round_sd(A, B, C, R) \
10576 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10577
10578 #define _mm_fnmsub_round_ss(A, B, C, R) \
10579 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10580 #endif
10581
10582 #ifdef __OPTIMIZE__
10583 extern __inline int
10584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10585 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10586 {
10587 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10588 }
10589
10590 extern __inline int
10591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10592 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10593 {
10594 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10595 }
10596 #else
10597 #define _mm_comi_round_ss(A, B, C, D)\
10598 __builtin_ia32_vcomiss(A, B, C, D)
10599 #define _mm_comi_round_sd(A, B, C, D)\
10600 __builtin_ia32_vcomisd(A, B, C, D)
10601 #endif
10602
10603 extern __inline __m512d
10604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10605 _mm512_sqrt_pd (__m512d __A)
10606 {
10607 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10608 (__v8df)
10609 _mm512_setzero_pd (),
10610 (__mmask8) -1,
10611 _MM_FROUND_CUR_DIRECTION);
10612 }
10613
10614 extern __inline __m512d
10615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10616 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10617 {
10618 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10619 (__v8df) __W,
10620 (__mmask8) __U,
10621 _MM_FROUND_CUR_DIRECTION);
10622 }
10623
10624 extern __inline __m512d
10625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10626 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10627 {
10628 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10629 (__v8df)
10630 _mm512_setzero_pd (),
10631 (__mmask8) __U,
10632 _MM_FROUND_CUR_DIRECTION);
10633 }
10634
10635 extern __inline __m512
10636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10637 _mm512_sqrt_ps (__m512 __A)
10638 {
10639 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10640 (__v16sf)
10641 _mm512_setzero_ps (),
10642 (__mmask16) -1,
10643 _MM_FROUND_CUR_DIRECTION);
10644 }
10645
10646 extern __inline __m512
10647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10648 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10649 {
10650 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10651 (__v16sf) __W,
10652 (__mmask16) __U,
10653 _MM_FROUND_CUR_DIRECTION);
10654 }
10655
10656 extern __inline __m512
10657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10659 {
10660 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10661 (__v16sf)
10662 _mm512_setzero_ps (),
10663 (__mmask16) __U,
10664 _MM_FROUND_CUR_DIRECTION);
10665 }
10666
10667 extern __inline __m512d
10668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10669 _mm512_add_pd (__m512d __A, __m512d __B)
10670 {
10671 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10672 (__v8df) __B,
10673 (__v8df)
10674 _mm512_setzero_pd (),
10675 (__mmask8) -1,
10676 _MM_FROUND_CUR_DIRECTION);
10677 }
10678
10679 extern __inline __m512d
10680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10681 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10682 {
10683 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10684 (__v8df) __B,
10685 (__v8df) __W,
10686 (__mmask8) __U,
10687 _MM_FROUND_CUR_DIRECTION);
10688 }
10689
10690 extern __inline __m512d
10691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10692 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10693 {
10694 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10695 (__v8df) __B,
10696 (__v8df)
10697 _mm512_setzero_pd (),
10698 (__mmask8) __U,
10699 _MM_FROUND_CUR_DIRECTION);
10700 }
10701
10702 extern __inline __m512
10703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10704 _mm512_add_ps (__m512 __A, __m512 __B)
10705 {
10706 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10707 (__v16sf) __B,
10708 (__v16sf)
10709 _mm512_setzero_ps (),
10710 (__mmask16) -1,
10711 _MM_FROUND_CUR_DIRECTION);
10712 }
10713
10714 extern __inline __m512
10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10717 {
10718 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10719 (__v16sf) __B,
10720 (__v16sf) __W,
10721 (__mmask16) __U,
10722 _MM_FROUND_CUR_DIRECTION);
10723 }
10724
10725 extern __inline __m512
10726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10727 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10728 {
10729 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10730 (__v16sf) __B,
10731 (__v16sf)
10732 _mm512_setzero_ps (),
10733 (__mmask16) __U,
10734 _MM_FROUND_CUR_DIRECTION);
10735 }
10736
10737 extern __inline __m512d
10738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10739 _mm512_sub_pd (__m512d __A, __m512d __B)
10740 {
10741 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10742 (__v8df) __B,
10743 (__v8df)
10744 _mm512_setzero_pd (),
10745 (__mmask8) -1,
10746 _MM_FROUND_CUR_DIRECTION);
10747 }
10748
10749 extern __inline __m512d
10750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10751 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10752 {
10753 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10754 (__v8df) __B,
10755 (__v8df) __W,
10756 (__mmask8) __U,
10757 _MM_FROUND_CUR_DIRECTION);
10758 }
10759
10760 extern __inline __m512d
10761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10762 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10763 {
10764 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10765 (__v8df) __B,
10766 (__v8df)
10767 _mm512_setzero_pd (),
10768 (__mmask8) __U,
10769 _MM_FROUND_CUR_DIRECTION);
10770 }
10771
10772 extern __inline __m512
10773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10774 _mm512_sub_ps (__m512 __A, __m512 __B)
10775 {
10776 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10777 (__v16sf) __B,
10778 (__v16sf)
10779 _mm512_setzero_ps (),
10780 (__mmask16) -1,
10781 _MM_FROUND_CUR_DIRECTION);
10782 }
10783
10784 extern __inline __m512
10785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10786 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10787 {
10788 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10789 (__v16sf) __B,
10790 (__v16sf) __W,
10791 (__mmask16) __U,
10792 _MM_FROUND_CUR_DIRECTION);
10793 }
10794
10795 extern __inline __m512
10796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10797 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10798 {
10799 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10800 (__v16sf) __B,
10801 (__v16sf)
10802 _mm512_setzero_ps (),
10803 (__mmask16) __U,
10804 _MM_FROUND_CUR_DIRECTION);
10805 }
10806
10807 extern __inline __m512d
10808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10809 _mm512_mul_pd (__m512d __A, __m512d __B)
10810 {
10811 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10812 (__v8df) __B,
10813 (__v8df)
10814 _mm512_setzero_pd (),
10815 (__mmask8) -1,
10816 _MM_FROUND_CUR_DIRECTION);
10817 }
10818
10819 extern __inline __m512d
10820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10821 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10822 {
10823 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10824 (__v8df) __B,
10825 (__v8df) __W,
10826 (__mmask8) __U,
10827 _MM_FROUND_CUR_DIRECTION);
10828 }
10829
10830 extern __inline __m512d
10831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10832 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10833 {
10834 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10835 (__v8df) __B,
10836 (__v8df)
10837 _mm512_setzero_pd (),
10838 (__mmask8) __U,
10839 _MM_FROUND_CUR_DIRECTION);
10840 }
10841
10842 extern __inline __m512
10843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10844 _mm512_mul_ps (__m512 __A, __m512 __B)
10845 {
10846 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10847 (__v16sf) __B,
10848 (__v16sf)
10849 _mm512_setzero_ps (),
10850 (__mmask16) -1,
10851 _MM_FROUND_CUR_DIRECTION);
10852 }
10853
10854 extern __inline __m512
10855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10856 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10857 {
10858 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10859 (__v16sf) __B,
10860 (__v16sf) __W,
10861 (__mmask16) __U,
10862 _MM_FROUND_CUR_DIRECTION);
10863 }
10864
10865 extern __inline __m512
10866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10867 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10868 {
10869 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10870 (__v16sf) __B,
10871 (__v16sf)
10872 _mm512_setzero_ps (),
10873 (__mmask16) __U,
10874 _MM_FROUND_CUR_DIRECTION);
10875 }
10876
10877 extern __inline __m512d
10878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10879 _mm512_div_pd (__m512d __M, __m512d __V)
10880 {
10881 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10882 (__v8df) __V,
10883 (__v8df)
10884 _mm512_setzero_pd (),
10885 (__mmask8) -1,
10886 _MM_FROUND_CUR_DIRECTION);
10887 }
10888
10889 extern __inline __m512d
10890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10892 {
10893 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10894 (__v8df) __V,
10895 (__v8df) __W,
10896 (__mmask8) __U,
10897 _MM_FROUND_CUR_DIRECTION);
10898 }
10899
10900 extern __inline __m512d
10901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10902 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10903 {
10904 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10905 (__v8df) __V,
10906 (__v8df)
10907 _mm512_setzero_pd (),
10908 (__mmask8) __U,
10909 _MM_FROUND_CUR_DIRECTION);
10910 }
10911
10912 extern __inline __m512
10913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10914 _mm512_div_ps (__m512 __A, __m512 __B)
10915 {
10916 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10917 (__v16sf) __B,
10918 (__v16sf)
10919 _mm512_setzero_ps (),
10920 (__mmask16) -1,
10921 _MM_FROUND_CUR_DIRECTION);
10922 }
10923
10924 extern __inline __m512
10925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10926 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10927 {
10928 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10929 (__v16sf) __B,
10930 (__v16sf) __W,
10931 (__mmask16) __U,
10932 _MM_FROUND_CUR_DIRECTION);
10933 }
10934
10935 extern __inline __m512
10936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10937 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10938 {
10939 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10940 (__v16sf) __B,
10941 (__v16sf)
10942 _mm512_setzero_ps (),
10943 (__mmask16) __U,
10944 _MM_FROUND_CUR_DIRECTION);
10945 }
10946
10947 extern __inline __m512d
10948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10949 _mm512_max_pd (__m512d __A, __m512d __B)
10950 {
10951 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10952 (__v8df) __B,
10953 (__v8df)
10954 _mm512_setzero_pd (),
10955 (__mmask8) -1,
10956 _MM_FROUND_CUR_DIRECTION);
10957 }
10958
10959 extern __inline __m512d
10960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10961 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10962 {
10963 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10964 (__v8df) __B,
10965 (__v8df) __W,
10966 (__mmask8) __U,
10967 _MM_FROUND_CUR_DIRECTION);
10968 }
10969
10970 extern __inline __m512d
10971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10972 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10973 {
10974 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10975 (__v8df) __B,
10976 (__v8df)
10977 _mm512_setzero_pd (),
10978 (__mmask8) __U,
10979 _MM_FROUND_CUR_DIRECTION);
10980 }
10981
10982 extern __inline __m512
10983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10984 _mm512_max_ps (__m512 __A, __m512 __B)
10985 {
10986 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10987 (__v16sf) __B,
10988 (__v16sf)
10989 _mm512_setzero_ps (),
10990 (__mmask16) -1,
10991 _MM_FROUND_CUR_DIRECTION);
10992 }
10993
10994 extern __inline __m512
10995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10996 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10997 {
10998 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10999 (__v16sf) __B,
11000 (__v16sf) __W,
11001 (__mmask16) __U,
11002 _MM_FROUND_CUR_DIRECTION);
11003 }
11004
11005 extern __inline __m512
11006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11007 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11008 {
11009 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11010 (__v16sf) __B,
11011 (__v16sf)
11012 _mm512_setzero_ps (),
11013 (__mmask16) __U,
11014 _MM_FROUND_CUR_DIRECTION);
11015 }
11016
11017 extern __inline __m512d
11018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11019 _mm512_min_pd (__m512d __A, __m512d __B)
11020 {
11021 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11022 (__v8df) __B,
11023 (__v8df)
11024 _mm512_setzero_pd (),
11025 (__mmask8) -1,
11026 _MM_FROUND_CUR_DIRECTION);
11027 }
11028
11029 extern __inline __m512d
11030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11031 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11032 {
11033 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11034 (__v8df) __B,
11035 (__v8df) __W,
11036 (__mmask8) __U,
11037 _MM_FROUND_CUR_DIRECTION);
11038 }
11039
11040 extern __inline __m512d
11041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11042 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11043 {
11044 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11045 (__v8df) __B,
11046 (__v8df)
11047 _mm512_setzero_pd (),
11048 (__mmask8) __U,
11049 _MM_FROUND_CUR_DIRECTION);
11050 }
11051
11052 extern __inline __m512
11053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11054 _mm512_min_ps (__m512 __A, __m512 __B)
11055 {
11056 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11057 (__v16sf) __B,
11058 (__v16sf)
11059 _mm512_setzero_ps (),
11060 (__mmask16) -1,
11061 _MM_FROUND_CUR_DIRECTION);
11062 }
11063
11064 extern __inline __m512
11065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11066 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11067 {
11068 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11069 (__v16sf) __B,
11070 (__v16sf) __W,
11071 (__mmask16) __U,
11072 _MM_FROUND_CUR_DIRECTION);
11073 }
11074
11075 extern __inline __m512
11076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11077 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11078 {
11079 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11080 (__v16sf) __B,
11081 (__v16sf)
11082 _mm512_setzero_ps (),
11083 (__mmask16) __U,
11084 _MM_FROUND_CUR_DIRECTION);
11085 }
11086
11087 extern __inline __m512d
11088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11089 _mm512_scalef_pd (__m512d __A, __m512d __B)
11090 {
11091 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11092 (__v8df) __B,
11093 (__v8df)
11094 _mm512_setzero_pd (),
11095 (__mmask8) -1,
11096 _MM_FROUND_CUR_DIRECTION);
11097 }
11098
11099 extern __inline __m512d
11100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11101 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11102 {
11103 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11104 (__v8df) __B,
11105 (__v8df) __W,
11106 (__mmask8) __U,
11107 _MM_FROUND_CUR_DIRECTION);
11108 }
11109
11110 extern __inline __m512d
11111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11112 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11113 {
11114 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11115 (__v8df) __B,
11116 (__v8df)
11117 _mm512_setzero_pd (),
11118 (__mmask8) __U,
11119 _MM_FROUND_CUR_DIRECTION);
11120 }
11121
11122 extern __inline __m512
11123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11124 _mm512_scalef_ps (__m512 __A, __m512 __B)
11125 {
11126 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11127 (__v16sf) __B,
11128 (__v16sf)
11129 _mm512_setzero_ps (),
11130 (__mmask16) -1,
11131 _MM_FROUND_CUR_DIRECTION);
11132 }
11133
11134 extern __inline __m512
11135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11136 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11137 {
11138 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11139 (__v16sf) __B,
11140 (__v16sf) __W,
11141 (__mmask16) __U,
11142 _MM_FROUND_CUR_DIRECTION);
11143 }
11144
11145 extern __inline __m512
11146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11148 {
11149 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11150 (__v16sf) __B,
11151 (__v16sf)
11152 _mm512_setzero_ps (),
11153 (__mmask16) __U,
11154 _MM_FROUND_CUR_DIRECTION);
11155 }
11156
11157 extern __inline __m128d
11158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159 _mm_scalef_sd (__m128d __A, __m128d __B)
11160 {
11161 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11162 (__v2df) __B,
11163 _MM_FROUND_CUR_DIRECTION);
11164 }
11165
11166 extern __inline __m128
11167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11168 _mm_scalef_ss (__m128 __A, __m128 __B)
11169 {
11170 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11171 (__v4sf) __B,
11172 _MM_FROUND_CUR_DIRECTION);
11173 }
11174
11175 extern __inline __m512d
11176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11177 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11178 {
11179 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11180 (__v8df) __B,
11181 (__v8df) __C,
11182 (__mmask8) -1,
11183 _MM_FROUND_CUR_DIRECTION);
11184 }
11185
11186 extern __inline __m512d
11187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11188 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11189 {
11190 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11191 (__v8df) __B,
11192 (__v8df) __C,
11193 (__mmask8) __U,
11194 _MM_FROUND_CUR_DIRECTION);
11195 }
11196
11197 extern __inline __m512d
11198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11199 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11200 {
11201 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11202 (__v8df) __B,
11203 (__v8df) __C,
11204 (__mmask8) __U,
11205 _MM_FROUND_CUR_DIRECTION);
11206 }
11207
11208 extern __inline __m512d
11209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11210 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11211 {
11212 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11213 (__v8df) __B,
11214 (__v8df) __C,
11215 (__mmask8) __U,
11216 _MM_FROUND_CUR_DIRECTION);
11217 }
11218
11219 extern __inline __m512
11220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11221 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11222 {
11223 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11224 (__v16sf) __B,
11225 (__v16sf) __C,
11226 (__mmask16) -1,
11227 _MM_FROUND_CUR_DIRECTION);
11228 }
11229
11230 extern __inline __m512
11231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11232 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11233 {
11234 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11235 (__v16sf) __B,
11236 (__v16sf) __C,
11237 (__mmask16) __U,
11238 _MM_FROUND_CUR_DIRECTION);
11239 }
11240
11241 extern __inline __m512
11242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11243 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11244 {
11245 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11246 (__v16sf) __B,
11247 (__v16sf) __C,
11248 (__mmask16) __U,
11249 _MM_FROUND_CUR_DIRECTION);
11250 }
11251
11252 extern __inline __m512
11253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11254 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11255 {
11256 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11257 (__v16sf) __B,
11258 (__v16sf) __C,
11259 (__mmask16) __U,
11260 _MM_FROUND_CUR_DIRECTION);
11261 }
11262
11263 extern __inline __m512d
11264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11265 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11266 {
11267 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11268 (__v8df) __B,
11269 -(__v8df) __C,
11270 (__mmask8) -1,
11271 _MM_FROUND_CUR_DIRECTION);
11272 }
11273
11274 extern __inline __m512d
11275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11276 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11277 {
11278 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11279 (__v8df) __B,
11280 -(__v8df) __C,
11281 (__mmask8) __U,
11282 _MM_FROUND_CUR_DIRECTION);
11283 }
11284
11285 extern __inline __m512d
11286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11287 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11288 {
11289 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11290 (__v8df) __B,
11291 (__v8df) __C,
11292 (__mmask8) __U,
11293 _MM_FROUND_CUR_DIRECTION);
11294 }
11295
11296 extern __inline __m512d
11297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11298 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11299 {
11300 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11301 (__v8df) __B,
11302 -(__v8df) __C,
11303 (__mmask8) __U,
11304 _MM_FROUND_CUR_DIRECTION);
11305 }
11306
11307 extern __inline __m512
11308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11309 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11310 {
11311 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11312 (__v16sf) __B,
11313 -(__v16sf) __C,
11314 (__mmask16) -1,
11315 _MM_FROUND_CUR_DIRECTION);
11316 }
11317
11318 extern __inline __m512
11319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11320 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11321 {
11322 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11323 (__v16sf) __B,
11324 -(__v16sf) __C,
11325 (__mmask16) __U,
11326 _MM_FROUND_CUR_DIRECTION);
11327 }
11328
11329 extern __inline __m512
11330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11331 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11332 {
11333 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11334 (__v16sf) __B,
11335 (__v16sf) __C,
11336 (__mmask16) __U,
11337 _MM_FROUND_CUR_DIRECTION);
11338 }
11339
11340 extern __inline __m512
11341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11342 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11343 {
11344 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11345 (__v16sf) __B,
11346 -(__v16sf) __C,
11347 (__mmask16) __U,
11348 _MM_FROUND_CUR_DIRECTION);
11349 }
11350
11351 extern __inline __m512d
11352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11353 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11354 {
11355 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11356 (__v8df) __B,
11357 (__v8df) __C,
11358 (__mmask8) -1,
11359 _MM_FROUND_CUR_DIRECTION);
11360 }
11361
11362 extern __inline __m512d
11363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11364 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11365 {
11366 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11367 (__v8df) __B,
11368 (__v8df) __C,
11369 (__mmask8) __U,
11370 _MM_FROUND_CUR_DIRECTION);
11371 }
11372
11373 extern __inline __m512d
11374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11375 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11376 {
11377 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11378 (__v8df) __B,
11379 (__v8df) __C,
11380 (__mmask8) __U,
11381 _MM_FROUND_CUR_DIRECTION);
11382 }
11383
11384 extern __inline __m512d
11385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11386 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11387 {
11388 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11389 (__v8df) __B,
11390 (__v8df) __C,
11391 (__mmask8) __U,
11392 _MM_FROUND_CUR_DIRECTION);
11393 }
11394
11395 extern __inline __m512
11396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11397 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11398 {
11399 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11400 (__v16sf) __B,
11401 (__v16sf) __C,
11402 (__mmask16) -1,
11403 _MM_FROUND_CUR_DIRECTION);
11404 }
11405
11406 extern __inline __m512
11407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11408 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11409 {
11410 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11411 (__v16sf) __B,
11412 (__v16sf) __C,
11413 (__mmask16) __U,
11414 _MM_FROUND_CUR_DIRECTION);
11415 }
11416
11417 extern __inline __m512
11418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11419 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11420 {
11421 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11422 (__v16sf) __B,
11423 (__v16sf) __C,
11424 (__mmask16) __U,
11425 _MM_FROUND_CUR_DIRECTION);
11426 }
11427
11428 extern __inline __m512
11429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11430 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11431 {
11432 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11433 (__v16sf) __B,
11434 (__v16sf) __C,
11435 (__mmask16) __U,
11436 _MM_FROUND_CUR_DIRECTION);
11437 }
11438
11439 extern __inline __m512d
11440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11441 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11442 {
11443 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11444 (__v8df) __B,
11445 -(__v8df) __C,
11446 (__mmask8) -1,
11447 _MM_FROUND_CUR_DIRECTION);
11448 }
11449
11450 extern __inline __m512d
11451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11452 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11453 {
11454 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11455 (__v8df) __B,
11456 -(__v8df) __C,
11457 (__mmask8) __U,
11458 _MM_FROUND_CUR_DIRECTION);
11459 }
11460
11461 extern __inline __m512d
11462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11463 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11464 {
11465 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11466 (__v8df) __B,
11467 (__v8df) __C,
11468 (__mmask8) __U,
11469 _MM_FROUND_CUR_DIRECTION);
11470 }
11471
11472 extern __inline __m512d
11473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11474 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11475 {
11476 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11477 (__v8df) __B,
11478 -(__v8df) __C,
11479 (__mmask8) __U,
11480 _MM_FROUND_CUR_DIRECTION);
11481 }
11482
11483 extern __inline __m512
11484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11485 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11486 {
11487 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11488 (__v16sf) __B,
11489 -(__v16sf) __C,
11490 (__mmask16) -1,
11491 _MM_FROUND_CUR_DIRECTION);
11492 }
11493
11494 extern __inline __m512
11495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11496 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11497 {
11498 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11499 (__v16sf) __B,
11500 -(__v16sf) __C,
11501 (__mmask16) __U,
11502 _MM_FROUND_CUR_DIRECTION);
11503 }
11504
11505 extern __inline __m512
11506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11507 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11508 {
11509 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11510 (__v16sf) __B,
11511 (__v16sf) __C,
11512 (__mmask16) __U,
11513 _MM_FROUND_CUR_DIRECTION);
11514 }
11515
11516 extern __inline __m512
11517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11518 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11519 {
11520 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11521 (__v16sf) __B,
11522 -(__v16sf) __C,
11523 (__mmask16) __U,
11524 _MM_FROUND_CUR_DIRECTION);
11525 }
11526
11527 extern __inline __m512d
11528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11529 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11530 {
11531 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11532 (__v8df) __B,
11533 (__v8df) __C,
11534 (__mmask8) -1,
11535 _MM_FROUND_CUR_DIRECTION);
11536 }
11537
11538 extern __inline __m512d
11539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11540 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11541 {
11542 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11543 (__v8df) __B,
11544 (__v8df) __C,
11545 (__mmask8) __U,
11546 _MM_FROUND_CUR_DIRECTION);
11547 }
11548
11549 extern __inline __m512d
11550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11551 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11552 {
11553 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11554 (__v8df) __B,
11555 (__v8df) __C,
11556 (__mmask8) __U,
11557 _MM_FROUND_CUR_DIRECTION);
11558 }
11559
11560 extern __inline __m512d
11561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11562 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11563 {
11564 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11565 (__v8df) __B,
11566 (__v8df) __C,
11567 (__mmask8) __U,
11568 _MM_FROUND_CUR_DIRECTION);
11569 }
11570
11571 extern __inline __m512
11572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11573 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11574 {
11575 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11576 (__v16sf) __B,
11577 (__v16sf) __C,
11578 (__mmask16) -1,
11579 _MM_FROUND_CUR_DIRECTION);
11580 }
11581
11582 extern __inline __m512
11583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11584 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11585 {
11586 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11587 (__v16sf) __B,
11588 (__v16sf) __C,
11589 (__mmask16) __U,
11590 _MM_FROUND_CUR_DIRECTION);
11591 }
11592
11593 extern __inline __m512
11594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11595 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11596 {
11597 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11598 (__v16sf) __B,
11599 (__v16sf) __C,
11600 (__mmask16) __U,
11601 _MM_FROUND_CUR_DIRECTION);
11602 }
11603
11604 extern __inline __m512
11605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11606 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11607 {
11608 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11609 (__v16sf) __B,
11610 (__v16sf) __C,
11611 (__mmask16) __U,
11612 _MM_FROUND_CUR_DIRECTION);
11613 }
11614
11615 extern __inline __m512d
11616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11617 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11618 {
11619 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11620 (__v8df) __B,
11621 -(__v8df) __C,
11622 (__mmask8) -1,
11623 _MM_FROUND_CUR_DIRECTION);
11624 }
11625
11626 extern __inline __m512d
11627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11628 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11629 {
11630 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11631 (__v8df) __B,
11632 (__v8df) __C,
11633 (__mmask8) __U,
11634 _MM_FROUND_CUR_DIRECTION);
11635 }
11636
11637 extern __inline __m512d
11638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11639 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11640 {
11641 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11642 (__v8df) __B,
11643 (__v8df) __C,
11644 (__mmask8) __U,
11645 _MM_FROUND_CUR_DIRECTION);
11646 }
11647
11648 extern __inline __m512d
11649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11650 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11651 {
11652 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11653 (__v8df) __B,
11654 -(__v8df) __C,
11655 (__mmask8) __U,
11656 _MM_FROUND_CUR_DIRECTION);
11657 }
11658
11659 extern __inline __m512
11660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11661 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11662 {
11663 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11664 (__v16sf) __B,
11665 -(__v16sf) __C,
11666 (__mmask16) -1,
11667 _MM_FROUND_CUR_DIRECTION);
11668 }
11669
11670 extern __inline __m512
11671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11673 {
11674 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11675 (__v16sf) __B,
11676 (__v16sf) __C,
11677 (__mmask16) __U,
11678 _MM_FROUND_CUR_DIRECTION);
11679 }
11680
11681 extern __inline __m512
11682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11683 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11684 {
11685 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11686 (__v16sf) __B,
11687 (__v16sf) __C,
11688 (__mmask16) __U,
11689 _MM_FROUND_CUR_DIRECTION);
11690 }
11691
11692 extern __inline __m512
11693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11694 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11695 {
11696 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11697 (__v16sf) __B,
11698 -(__v16sf) __C,
11699 (__mmask16) __U,
11700 _MM_FROUND_CUR_DIRECTION);
11701 }
11702
11703 extern __inline __m256i
11704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11705 _mm512_cvttpd_epi32 (__m512d __A)
11706 {
11707 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11708 (__v8si)
11709 _mm256_setzero_si256 (),
11710 (__mmask8) -1,
11711 _MM_FROUND_CUR_DIRECTION);
11712 }
11713
11714 extern __inline __m256i
11715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11717 {
11718 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11719 (__v8si) __W,
11720 (__mmask8) __U,
11721 _MM_FROUND_CUR_DIRECTION);
11722 }
11723
11724 extern __inline __m256i
11725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11726 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11727 {
11728 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11729 (__v8si)
11730 _mm256_setzero_si256 (),
11731 (__mmask8) __U,
11732 _MM_FROUND_CUR_DIRECTION);
11733 }
11734
11735 extern __inline __m256i
11736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11737 _mm512_cvttpd_epu32 (__m512d __A)
11738 {
11739 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11740 (__v8si)
11741 _mm256_setzero_si256 (),
11742 (__mmask8) -1,
11743 _MM_FROUND_CUR_DIRECTION);
11744 }
11745
11746 extern __inline __m256i
11747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11748 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11749 {
11750 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11751 (__v8si) __W,
11752 (__mmask8) __U,
11753 _MM_FROUND_CUR_DIRECTION);
11754 }
11755
11756 extern __inline __m256i
11757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11758 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11759 {
11760 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11761 (__v8si)
11762 _mm256_setzero_si256 (),
11763 (__mmask8) __U,
11764 _MM_FROUND_CUR_DIRECTION);
11765 }
11766
11767 extern __inline __m256i
11768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11769 _mm512_cvtpd_epi32 (__m512d __A)
11770 {
11771 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11772 (__v8si)
11773 _mm256_setzero_si256 (),
11774 (__mmask8) -1,
11775 _MM_FROUND_CUR_DIRECTION);
11776 }
11777
11778 extern __inline __m256i
11779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11780 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11781 {
11782 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11783 (__v8si) __W,
11784 (__mmask8) __U,
11785 _MM_FROUND_CUR_DIRECTION);
11786 }
11787
11788 extern __inline __m256i
11789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11790 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11791 {
11792 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11793 (__v8si)
11794 _mm256_setzero_si256 (),
11795 (__mmask8) __U,
11796 _MM_FROUND_CUR_DIRECTION);
11797 }
11798
11799 extern __inline __m256i
11800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11801 _mm512_cvtpd_epu32 (__m512d __A)
11802 {
11803 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11804 (__v8si)
11805 _mm256_setzero_si256 (),
11806 (__mmask8) -1,
11807 _MM_FROUND_CUR_DIRECTION);
11808 }
11809
11810 extern __inline __m256i
11811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11812 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11813 {
11814 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11815 (__v8si) __W,
11816 (__mmask8) __U,
11817 _MM_FROUND_CUR_DIRECTION);
11818 }
11819
11820 extern __inline __m256i
11821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11822 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11823 {
11824 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11825 (__v8si)
11826 _mm256_setzero_si256 (),
11827 (__mmask8) __U,
11828 _MM_FROUND_CUR_DIRECTION);
11829 }
11830
11831 extern __inline __m512i
11832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11833 _mm512_cvttps_epi32 (__m512 __A)
11834 {
11835 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11836 (__v16si)
11837 _mm512_setzero_si512 (),
11838 (__mmask16) -1,
11839 _MM_FROUND_CUR_DIRECTION);
11840 }
11841
11842 extern __inline __m512i
11843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11844 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11845 {
11846 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11847 (__v16si) __W,
11848 (__mmask16) __U,
11849 _MM_FROUND_CUR_DIRECTION);
11850 }
11851
11852 extern __inline __m512i
11853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11854 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11855 {
11856 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11857 (__v16si)
11858 _mm512_setzero_si512 (),
11859 (__mmask16) __U,
11860 _MM_FROUND_CUR_DIRECTION);
11861 }
11862
11863 extern __inline __m512i
11864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11865 _mm512_cvttps_epu32 (__m512 __A)
11866 {
11867 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11868 (__v16si)
11869 _mm512_setzero_si512 (),
11870 (__mmask16) -1,
11871 _MM_FROUND_CUR_DIRECTION);
11872 }
11873
11874 extern __inline __m512i
11875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11876 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11877 {
11878 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11879 (__v16si) __W,
11880 (__mmask16) __U,
11881 _MM_FROUND_CUR_DIRECTION);
11882 }
11883
11884 extern __inline __m512i
11885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11886 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11887 {
11888 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11889 (__v16si)
11890 _mm512_setzero_si512 (),
11891 (__mmask16) __U,
11892 _MM_FROUND_CUR_DIRECTION);
11893 }
11894
11895 extern __inline __m512i
11896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11897 _mm512_cvtps_epi32 (__m512 __A)
11898 {
11899 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11900 (__v16si)
11901 _mm512_setzero_si512 (),
11902 (__mmask16) -1,
11903 _MM_FROUND_CUR_DIRECTION);
11904 }
11905
11906 extern __inline __m512i
11907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11908 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11909 {
11910 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11911 (__v16si) __W,
11912 (__mmask16) __U,
11913 _MM_FROUND_CUR_DIRECTION);
11914 }
11915
11916 extern __inline __m512i
11917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11918 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11919 {
11920 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11921 (__v16si)
11922 _mm512_setzero_si512 (),
11923 (__mmask16) __U,
11924 _MM_FROUND_CUR_DIRECTION);
11925 }
11926
11927 extern __inline __m512i
11928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11929 _mm512_cvtps_epu32 (__m512 __A)
11930 {
11931 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11932 (__v16si)
11933 _mm512_setzero_si512 (),
11934 (__mmask16) -1,
11935 _MM_FROUND_CUR_DIRECTION);
11936 }
11937
11938 extern __inline __m512i
11939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11941 {
11942 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11943 (__v16si) __W,
11944 (__mmask16) __U,
11945 _MM_FROUND_CUR_DIRECTION);
11946 }
11947
11948 extern __inline __m512i
11949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11950 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11951 {
11952 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11953 (__v16si)
11954 _mm512_setzero_si512 (),
11955 (__mmask16) __U,
11956 _MM_FROUND_CUR_DIRECTION);
11957 }
11958
11959 #ifdef __x86_64__
11960 extern __inline __m128
11961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11962 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11963 {
11964 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11965 _MM_FROUND_CUR_DIRECTION);
11966 }
11967
11968 extern __inline __m128d
11969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11970 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11971 {
11972 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11973 _MM_FROUND_CUR_DIRECTION);
11974 }
11975 #endif
11976
11977 extern __inline __m128
11978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11979 _mm_cvtu32_ss (__m128 __A, unsigned __B)
11980 {
11981 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11982 _MM_FROUND_CUR_DIRECTION);
11983 }
11984
11985 extern __inline __m512
11986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11987 _mm512_cvtepi32_ps (__m512i __A)
11988 {
11989 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11990 (__v16sf)
11991 _mm512_setzero_ps (),
11992 (__mmask16) -1,
11993 _MM_FROUND_CUR_DIRECTION);
11994 }
11995
11996 extern __inline __m512
11997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11998 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11999 {
12000 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12001 (__v16sf) __W,
12002 (__mmask16) __U,
12003 _MM_FROUND_CUR_DIRECTION);
12004 }
12005
12006 extern __inline __m512
12007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12008 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
12009 {
12010 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12011 (__v16sf)
12012 _mm512_setzero_ps (),
12013 (__mmask16) __U,
12014 _MM_FROUND_CUR_DIRECTION);
12015 }
12016
12017 extern __inline __m512
12018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12019 _mm512_cvtepu32_ps (__m512i __A)
12020 {
12021 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12022 (__v16sf)
12023 _mm512_setzero_ps (),
12024 (__mmask16) -1,
12025 _MM_FROUND_CUR_DIRECTION);
12026 }
12027
12028 extern __inline __m512
12029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12030 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12031 {
12032 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12033 (__v16sf) __W,
12034 (__mmask16) __U,
12035 _MM_FROUND_CUR_DIRECTION);
12036 }
12037
12038 extern __inline __m512
12039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12040 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12041 {
12042 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12043 (__v16sf)
12044 _mm512_setzero_ps (),
12045 (__mmask16) __U,
12046 _MM_FROUND_CUR_DIRECTION);
12047 }
12048
12049 #ifdef __OPTIMIZE__
12050 extern __inline __m512d
12051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12052 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12053 {
12054 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12055 (__v8df) __B,
12056 (__v8di) __C,
12057 __imm,
12058 (__mmask8) -1,
12059 _MM_FROUND_CUR_DIRECTION);
12060 }
12061
12062 extern __inline __m512d
12063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12064 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12065 __m512i __C, const int __imm)
12066 {
12067 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12068 (__v8df) __B,
12069 (__v8di) __C,
12070 __imm,
12071 (__mmask8) __U,
12072 _MM_FROUND_CUR_DIRECTION);
12073 }
12074
12075 extern __inline __m512d
12076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12077 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12078 __m512i __C, const int __imm)
12079 {
12080 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12081 (__v8df) __B,
12082 (__v8di) __C,
12083 __imm,
12084 (__mmask8) __U,
12085 _MM_FROUND_CUR_DIRECTION);
12086 }
12087
12088 extern __inline __m512
12089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12090 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12091 {
12092 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12093 (__v16sf) __B,
12094 (__v16si) __C,
12095 __imm,
12096 (__mmask16) -1,
12097 _MM_FROUND_CUR_DIRECTION);
12098 }
12099
12100 extern __inline __m512
12101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12102 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12103 __m512i __C, const int __imm)
12104 {
12105 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12106 (__v16sf) __B,
12107 (__v16si) __C,
12108 __imm,
12109 (__mmask16) __U,
12110 _MM_FROUND_CUR_DIRECTION);
12111 }
12112
12113 extern __inline __m512
12114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12115 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12116 __m512i __C, const int __imm)
12117 {
12118 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12119 (__v16sf) __B,
12120 (__v16si) __C,
12121 __imm,
12122 (__mmask16) __U,
12123 _MM_FROUND_CUR_DIRECTION);
12124 }
12125
12126 extern __inline __m128d
12127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12128 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12129 {
12130 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12131 (__v2df) __B,
12132 (__v2di) __C, __imm,
12133 (__mmask8) -1,
12134 _MM_FROUND_CUR_DIRECTION);
12135 }
12136
12137 extern __inline __m128d
12138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12139 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12140 __m128i __C, const int __imm)
12141 {
12142 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12143 (__v2df) __B,
12144 (__v2di) __C, __imm,
12145 (__mmask8) __U,
12146 _MM_FROUND_CUR_DIRECTION);
12147 }
12148
12149 extern __inline __m128d
12150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12151 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12152 __m128i __C, const int __imm)
12153 {
12154 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12155 (__v2df) __B,
12156 (__v2di) __C,
12157 __imm,
12158 (__mmask8) __U,
12159 _MM_FROUND_CUR_DIRECTION);
12160 }
12161
12162 extern __inline __m128
12163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12164 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12165 {
12166 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12167 (__v4sf) __B,
12168 (__v4si) __C, __imm,
12169 (__mmask8) -1,
12170 _MM_FROUND_CUR_DIRECTION);
12171 }
12172
12173 extern __inline __m128
12174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12175 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12176 __m128i __C, const int __imm)
12177 {
12178 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12179 (__v4sf) __B,
12180 (__v4si) __C, __imm,
12181 (__mmask8) __U,
12182 _MM_FROUND_CUR_DIRECTION);
12183 }
12184
12185 extern __inline __m128
12186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12187 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12188 __m128i __C, const int __imm)
12189 {
12190 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12191 (__v4sf) __B,
12192 (__v4si) __C, __imm,
12193 (__mmask8) __U,
12194 _MM_FROUND_CUR_DIRECTION);
12195 }
12196 #else
12197 #define _mm512_fixupimm_pd(X, Y, Z, C) \
12198 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12199 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12200 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12201
12202 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12203 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12204 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12205 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12206
12207 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12208 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12209 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12210 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12211
12212 #define _mm512_fixupimm_ps(X, Y, Z, C) \
12213 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12214 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12215 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12216
12217 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12218 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12219 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12220 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12221
12222 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12223 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12224 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12225 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12226
12227 #define _mm_fixupimm_sd(X, Y, Z, C) \
12228 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12229 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12230 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12231
12232 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12233 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12234 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12235 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12236
12237 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12238 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12239 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12240 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12241
12242 #define _mm_fixupimm_ss(X, Y, Z, C) \
12243 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12244 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12245 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12246
12247 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12248 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12249 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12250 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12251
12252 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12253 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12254 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12255 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12256 #endif
12257
12258 #ifdef __x86_64__
12259 extern __inline unsigned long long
12260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12261 _mm_cvtss_u64 (__m128 __A)
12262 {
12263 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12264 __A,
12265 _MM_FROUND_CUR_DIRECTION);
12266 }
12267
12268 extern __inline unsigned long long
12269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12270 _mm_cvttss_u64 (__m128 __A)
12271 {
12272 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12273 __A,
12274 _MM_FROUND_CUR_DIRECTION);
12275 }
12276
12277 extern __inline long long
12278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12279 _mm_cvttss_i64 (__m128 __A)
12280 {
12281 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12282 _MM_FROUND_CUR_DIRECTION);
12283 }
12284 #endif /* __x86_64__ */
12285
12286 extern __inline unsigned
12287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12288 _mm_cvtss_u32 (__m128 __A)
12289 {
12290 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12291 _MM_FROUND_CUR_DIRECTION);
12292 }
12293
12294 extern __inline unsigned
12295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12296 _mm_cvttss_u32 (__m128 __A)
12297 {
12298 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12299 _MM_FROUND_CUR_DIRECTION);
12300 }
12301
12302 extern __inline int
12303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12304 _mm_cvttss_i32 (__m128 __A)
12305 {
12306 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12307 _MM_FROUND_CUR_DIRECTION);
12308 }
12309
12310 #ifdef __x86_64__
12311 extern __inline unsigned long long
12312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12313 _mm_cvtsd_u64 (__m128d __A)
12314 {
12315 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12316 __A,
12317 _MM_FROUND_CUR_DIRECTION);
12318 }
12319
12320 extern __inline unsigned long long
12321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12322 _mm_cvttsd_u64 (__m128d __A)
12323 {
12324 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12325 __A,
12326 _MM_FROUND_CUR_DIRECTION);
12327 }
12328
12329 extern __inline long long
12330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12331 _mm_cvttsd_i64 (__m128d __A)
12332 {
12333 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12334 _MM_FROUND_CUR_DIRECTION);
12335 }
12336 #endif /* __x86_64__ */
12337
12338 extern __inline unsigned
12339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12340 _mm_cvtsd_u32 (__m128d __A)
12341 {
12342 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12343 _MM_FROUND_CUR_DIRECTION);
12344 }
12345
12346 extern __inline unsigned
12347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12348 _mm_cvttsd_u32 (__m128d __A)
12349 {
12350 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12351 _MM_FROUND_CUR_DIRECTION);
12352 }
12353
12354 extern __inline int
12355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12356 _mm_cvttsd_i32 (__m128d __A)
12357 {
12358 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12359 _MM_FROUND_CUR_DIRECTION);
12360 }
12361
12362 extern __inline __m512d
12363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12364 _mm512_cvtps_pd (__m256 __A)
12365 {
12366 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12367 (__v8df)
12368 _mm512_setzero_pd (),
12369 (__mmask8) -1,
12370 _MM_FROUND_CUR_DIRECTION);
12371 }
12372
12373 extern __inline __m512d
12374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12375 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12376 {
12377 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12378 (__v8df) __W,
12379 (__mmask8) __U,
12380 _MM_FROUND_CUR_DIRECTION);
12381 }
12382
12383 extern __inline __m512d
12384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12385 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12386 {
12387 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12388 (__v8df)
12389 _mm512_setzero_pd (),
12390 (__mmask8) __U,
12391 _MM_FROUND_CUR_DIRECTION);
12392 }
12393
12394 extern __inline __m512
12395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12396 _mm512_cvtph_ps (__m256i __A)
12397 {
12398 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12399 (__v16sf)
12400 _mm512_setzero_ps (),
12401 (__mmask16) -1,
12402 _MM_FROUND_CUR_DIRECTION);
12403 }
12404
12405 extern __inline __m512
12406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12407 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12408 {
12409 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12410 (__v16sf) __W,
12411 (__mmask16) __U,
12412 _MM_FROUND_CUR_DIRECTION);
12413 }
12414
12415 extern __inline __m512
12416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12417 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12418 {
12419 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12420 (__v16sf)
12421 _mm512_setzero_ps (),
12422 (__mmask16) __U,
12423 _MM_FROUND_CUR_DIRECTION);
12424 }
12425
12426 extern __inline __m256
12427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12428 _mm512_cvtpd_ps (__m512d __A)
12429 {
12430 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12431 (__v8sf)
12432 _mm256_setzero_ps (),
12433 (__mmask8) -1,
12434 _MM_FROUND_CUR_DIRECTION);
12435 }
12436
12437 extern __inline __m256
12438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12439 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12440 {
12441 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12442 (__v8sf) __W,
12443 (__mmask8) __U,
12444 _MM_FROUND_CUR_DIRECTION);
12445 }
12446
12447 extern __inline __m256
12448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12449 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12450 {
12451 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12452 (__v8sf)
12453 _mm256_setzero_ps (),
12454 (__mmask8) __U,
12455 _MM_FROUND_CUR_DIRECTION);
12456 }
12457
12458 #ifdef __OPTIMIZE__
12459 extern __inline __m512
12460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12461 _mm512_getexp_ps (__m512 __A)
12462 {
12463 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12464 (__v16sf)
12465 _mm512_setzero_ps (),
12466 (__mmask16) -1,
12467 _MM_FROUND_CUR_DIRECTION);
12468 }
12469
12470 extern __inline __m512
12471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12472 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12473 {
12474 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12475 (__v16sf) __W,
12476 (__mmask16) __U,
12477 _MM_FROUND_CUR_DIRECTION);
12478 }
12479
12480 extern __inline __m512
12481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12482 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12483 {
12484 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12485 (__v16sf)
12486 _mm512_setzero_ps (),
12487 (__mmask16) __U,
12488 _MM_FROUND_CUR_DIRECTION);
12489 }
12490
12491 extern __inline __m512d
12492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12493 _mm512_getexp_pd (__m512d __A)
12494 {
12495 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12496 (__v8df)
12497 _mm512_setzero_pd (),
12498 (__mmask8) -1,
12499 _MM_FROUND_CUR_DIRECTION);
12500 }
12501
12502 extern __inline __m512d
12503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12504 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12505 {
12506 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12507 (__v8df) __W,
12508 (__mmask8) __U,
12509 _MM_FROUND_CUR_DIRECTION);
12510 }
12511
12512 extern __inline __m512d
12513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12514 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12515 {
12516 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12517 (__v8df)
12518 _mm512_setzero_pd (),
12519 (__mmask8) __U,
12520 _MM_FROUND_CUR_DIRECTION);
12521 }
12522
12523 extern __inline __m128
12524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12525 _mm_getexp_ss (__m128 __A, __m128 __B)
12526 {
12527 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12528 (__v4sf) __B,
12529 _MM_FROUND_CUR_DIRECTION);
12530 }
12531
12532 extern __inline __m128d
12533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12534 _mm_getexp_sd (__m128d __A, __m128d __B)
12535 {
12536 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12537 (__v2df) __B,
12538 _MM_FROUND_CUR_DIRECTION);
12539 }
12540
12541 extern __inline __m512d
12542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12543 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12544 _MM_MANTISSA_SIGN_ENUM __C)
12545 {
12546 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12547 (__C << 2) | __B,
12548 _mm512_setzero_pd (),
12549 (__mmask8) -1,
12550 _MM_FROUND_CUR_DIRECTION);
12551 }
12552
12553 extern __inline __m512d
12554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12555 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12556 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12557 {
12558 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12559 (__C << 2) | __B,
12560 (__v8df) __W, __U,
12561 _MM_FROUND_CUR_DIRECTION);
12562 }
12563
12564 extern __inline __m512d
12565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12566 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12567 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12568 {
12569 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12570 (__C << 2) | __B,
12571 (__v8df)
12572 _mm512_setzero_pd (),
12573 __U,
12574 _MM_FROUND_CUR_DIRECTION);
12575 }
12576
12577 extern __inline __m512
12578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12579 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12580 _MM_MANTISSA_SIGN_ENUM __C)
12581 {
12582 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12583 (__C << 2) | __B,
12584 _mm512_setzero_ps (),
12585 (__mmask16) -1,
12586 _MM_FROUND_CUR_DIRECTION);
12587 }
12588
12589 extern __inline __m512
12590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12591 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12592 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12593 {
12594 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12595 (__C << 2) | __B,
12596 (__v16sf) __W, __U,
12597 _MM_FROUND_CUR_DIRECTION);
12598 }
12599
12600 extern __inline __m512
12601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12602 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12603 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12604 {
12605 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12606 (__C << 2) | __B,
12607 (__v16sf)
12608 _mm512_setzero_ps (),
12609 __U,
12610 _MM_FROUND_CUR_DIRECTION);
12611 }
12612
12613 extern __inline __m128d
12614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12615 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12616 _MM_MANTISSA_SIGN_ENUM __D)
12617 {
12618 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12619 (__v2df) __B,
12620 (__D << 2) | __C,
12621 _MM_FROUND_CUR_DIRECTION);
12622 }
12623
12624 extern __inline __m128
12625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12626 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12627 _MM_MANTISSA_SIGN_ENUM __D)
12628 {
12629 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12630 (__v4sf) __B,
12631 (__D << 2) | __C,
12632 _MM_FROUND_CUR_DIRECTION);
12633 }
12634
12635 #else
12636 #define _mm512_getmant_pd(X, B, C) \
12637 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12638 (int)(((C)<<2) | (B)), \
12639 (__v8df)(__m512d)_mm512_setzero_pd(), \
12640 (__mmask8)-1,\
12641 _MM_FROUND_CUR_DIRECTION))
12642
12643 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12644 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12645 (int)(((C)<<2) | (B)), \
12646 (__v8df)(__m512d)(W), \
12647 (__mmask8)(U),\
12648 _MM_FROUND_CUR_DIRECTION))
12649
12650 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12651 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12652 (int)(((C)<<2) | (B)), \
12653 (__v8df)(__m512d)_mm512_setzero_pd(), \
12654 (__mmask8)(U),\
12655 _MM_FROUND_CUR_DIRECTION))
12656 #define _mm512_getmant_ps(X, B, C) \
12657 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12658 (int)(((C)<<2) | (B)), \
12659 (__v16sf)(__m512)_mm512_setzero_ps(), \
12660 (__mmask16)-1,\
12661 _MM_FROUND_CUR_DIRECTION))
12662
12663 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12664 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12665 (int)(((C)<<2) | (B)), \
12666 (__v16sf)(__m512)(W), \
12667 (__mmask16)(U),\
12668 _MM_FROUND_CUR_DIRECTION))
12669
12670 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12671 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12672 (int)(((C)<<2) | (B)), \
12673 (__v16sf)(__m512)_mm512_setzero_ps(), \
12674 (__mmask16)(U),\
12675 _MM_FROUND_CUR_DIRECTION))
12676 #define _mm_getmant_sd(X, Y, C, D) \
12677 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12678 (__v2df)(__m128d)(Y), \
12679 (int)(((D)<<2) | (C)), \
12680 _MM_FROUND_CUR_DIRECTION))
12681
12682 #define _mm_getmant_ss(X, Y, C, D) \
12683 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12684 (__v4sf)(__m128)(Y), \
12685 (int)(((D)<<2) | (C)), \
12686 _MM_FROUND_CUR_DIRECTION))
12687
12688 #define _mm_getexp_ss(A, B) \
12689 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12690 _MM_FROUND_CUR_DIRECTION))
12691
12692 #define _mm_getexp_sd(A, B) \
12693 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12694 _MM_FROUND_CUR_DIRECTION))
12695
12696 #define _mm512_getexp_ps(A) \
12697 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12698 (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12699
12700 #define _mm512_mask_getexp_ps(W, U, A) \
12701 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12702 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12703
12704 #define _mm512_maskz_getexp_ps(U, A) \
12705 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12706 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12707
12708 #define _mm512_getexp_pd(A) \
12709 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12710 (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12711
12712 #define _mm512_mask_getexp_pd(W, U, A) \
12713 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12714 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12715
12716 #define _mm512_maskz_getexp_pd(U, A) \
12717 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12718 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12719 #endif
12720
12721 #ifdef __OPTIMIZE__
12722 extern __inline __m512
12723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12724 _mm512_roundscale_ps (__m512 __A, const int __imm)
12725 {
12726 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12727 (__v16sf) __A, -1,
12728 _MM_FROUND_CUR_DIRECTION);
12729 }
12730
12731 extern __inline __m512
12732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12733 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12734 const int __imm)
12735 {
12736 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12737 (__v16sf) __A,
12738 (__mmask16) __B,
12739 _MM_FROUND_CUR_DIRECTION);
12740 }
12741
12742 extern __inline __m512
12743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12744 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12745 {
12746 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12747 __imm,
12748 (__v16sf)
12749 _mm512_setzero_ps (),
12750 (__mmask16) __A,
12751 _MM_FROUND_CUR_DIRECTION);
12752 }
12753
12754 extern __inline __m512d
12755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12756 _mm512_roundscale_pd (__m512d __A, const int __imm)
12757 {
12758 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12759 (__v8df) __A, -1,
12760 _MM_FROUND_CUR_DIRECTION);
12761 }
12762
12763 extern __inline __m512d
12764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12765 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12766 const int __imm)
12767 {
12768 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12769 (__v8df) __A,
12770 (__mmask8) __B,
12771 _MM_FROUND_CUR_DIRECTION);
12772 }
12773
12774 extern __inline __m512d
12775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12776 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12777 {
12778 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12779 __imm,
12780 (__v8df)
12781 _mm512_setzero_pd (),
12782 (__mmask8) __A,
12783 _MM_FROUND_CUR_DIRECTION);
12784 }
12785
12786 extern __inline __m128
12787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12788 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12789 {
12790 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12791 (__v4sf) __B, __imm,
12792 _MM_FROUND_CUR_DIRECTION);
12793 }
12794
12795 extern __inline __m128d
12796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12797 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12798 {
12799 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12800 (__v2df) __B, __imm,
12801 _MM_FROUND_CUR_DIRECTION);
12802 }
12803
12804 #else
12805 #define _mm512_roundscale_ps(A, B) \
12806 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12807 (__v16sf)(__m512)(A), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12808 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12809 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12810 (int)(D), \
12811 (__v16sf)(__m512)(A), \
12812 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12813 #define _mm512_maskz_roundscale_ps(A, B, C) \
12814 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12815 (int)(C), \
12816 (__v16sf)_mm512_setzero_ps(),\
12817 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12818 #define _mm512_roundscale_pd(A, B) \
12819 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12820 (__v8df)(__m512d)(A), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12821 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12822 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12823 (int)(D), \
12824 (__v8df)(__m512d)(A), \
12825 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12826 #define _mm512_maskz_roundscale_pd(A, B, C) \
12827 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12828 (int)(C), \
12829 (__v8df)_mm512_setzero_pd(),\
12830 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12831 #define _mm_roundscale_ss(A, B, C) \
12832 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12833 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12834 #define _mm_roundscale_sd(A, B, C) \
12835 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12836 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12837 #endif
12838
12839 #ifdef __OPTIMIZE__
12840 extern __inline __mmask8
12841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12842 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12843 {
12844 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12845 (__v8df) __Y, __P,
12846 (__mmask8) -1,
12847 _MM_FROUND_CUR_DIRECTION);
12848 }
12849
12850 extern __inline __mmask16
12851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12852 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12853 {
12854 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12855 (__v16sf) __Y, __P,
12856 (__mmask16) -1,
12857 _MM_FROUND_CUR_DIRECTION);
12858 }
12859
12860 extern __inline __mmask16
12861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12862 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12863 {
12864 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12865 (__v16sf) __Y, __P,
12866 (__mmask16) __U,
12867 _MM_FROUND_CUR_DIRECTION);
12868 }
12869
12870 extern __inline __mmask8
12871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12872 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12873 {
12874 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12875 (__v8df) __Y, __P,
12876 (__mmask8) __U,
12877 _MM_FROUND_CUR_DIRECTION);
12878 }
12879
12880 extern __inline __mmask8
12881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12882 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12883 {
12884 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12885 (__v2df) __Y, __P,
12886 (__mmask8) -1,
12887 _MM_FROUND_CUR_DIRECTION);
12888 }
12889
12890 extern __inline __mmask8
12891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12892 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12893 {
12894 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12895 (__v2df) __Y, __P,
12896 (__mmask8) __M,
12897 _MM_FROUND_CUR_DIRECTION);
12898 }
12899
12900 extern __inline __mmask8
12901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12902 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12903 {
12904 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12905 (__v4sf) __Y, __P,
12906 (__mmask8) -1,
12907 _MM_FROUND_CUR_DIRECTION);
12908 }
12909
12910 extern __inline __mmask8
12911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12912 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12913 {
12914 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12915 (__v4sf) __Y, __P,
12916 (__mmask8) __M,
12917 _MM_FROUND_CUR_DIRECTION);
12918 }
12919
12920 #else
12921 #define _mm512_cmp_pd_mask(X, Y, P) \
12922 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12923 (__v8df)(__m512d)(Y), (int)(P),\
12924 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12925
12926 #define _mm512_cmp_ps_mask(X, Y, P) \
12927 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12928 (__v16sf)(__m512)(Y), (int)(P),\
12929 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12930
12931 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12932 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12933 (__v8df)(__m512d)(Y), (int)(P),\
12934 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12935
12936 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12937 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12938 (__v16sf)(__m512)(Y), (int)(P),\
12939 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12940
12941 #define _mm_cmp_sd_mask(X, Y, P) \
12942 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12943 (__v2df)(__m128d)(Y), (int)(P),\
12944 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12945
12946 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12947 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12948 (__v2df)(__m128d)(Y), (int)(P),\
12949 M,_MM_FROUND_CUR_DIRECTION))
12950
12951 #define _mm_cmp_ss_mask(X, Y, P) \
12952 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12953 (__v4sf)(__m128)(Y), (int)(P), \
12954 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12955
12956 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12957 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12958 (__v4sf)(__m128)(Y), (int)(P), \
12959 M,_MM_FROUND_CUR_DIRECTION))
12960 #endif
12961
12962 extern __inline __mmask16
12963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12964 _mm512_kmov (__mmask16 __A)
12965 {
12966 return __builtin_ia32_kmov16 (__A);
12967 }
12968
12969 #ifdef __DISABLE_AVX512F__
12970 #undef __DISABLE_AVX512F__
12971 #pragma GCC pop_options
12972 #endif /* __DISABLE_AVX512F__ */
12973
12974 #endif /* _AVX512FINTRIN_H_INCLUDED */