]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512fintrin.h
re PR target/80322 (convert intrinsics missing)
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55 /* Unaligned version of the same type. */
56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
60 typedef unsigned char __mmask8;
61 typedef unsigned short __mmask16;
62
63 extern __inline __mmask16
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_int2mask (int __M)
66 {
67 return (__mmask16) __M;
68 }
69
70 extern __inline int
71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72 _mm512_mask2int (__mmask16 __M)
73 {
74 return (int) __M;
75 }
76
77 extern __inline __m512i
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
82 {
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
85 }
86
87 /* Create the vector [A B C D E F G H I J K L M N O P]. */
88 extern __inline __m512i
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
94 {
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
98 }
99
100 extern __inline __m512d
101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
102 _mm512_set_pd (double __A, double __B, double __C, double __D,
103 double __E, double __F, double __G, double __H)
104 {
105 return __extension__ (__m512d)
106 { __H, __G, __F, __E, __D, __C, __B, __A };
107 }
108
109 extern __inline __m512
110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
111 _mm512_set_ps (float __A, float __B, float __C, float __D,
112 float __E, float __F, float __G, float __H,
113 float __I, float __J, float __K, float __L,
114 float __M, float __N, float __O, float __P)
115 {
116 return __extension__ (__m512)
117 { __P, __O, __N, __M, __L, __K, __J, __I,
118 __H, __G, __F, __E, __D, __C, __B, __A };
119 }
120
121 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
122 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
123
124 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
125 e8,e9,e10,e11,e12,e13,e14,e15) \
126 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
127
128 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
129 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
130
131 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
132 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
133
134 extern __inline __m512
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm512_undefined_ps (void)
137 {
138 __m512 __Y = __Y;
139 return __Y;
140 }
141
142 #define _mm512_undefined _mm512_undefined_ps
143
144 extern __inline __m512d
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146 _mm512_undefined_pd (void)
147 {
148 __m512d __Y = __Y;
149 return __Y;
150 }
151
152 extern __inline __m512i
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _mm512_undefined_epi32 (void)
155 {
156 __m512i __Y = __Y;
157 return __Y;
158 }
159
160 #define _mm512_undefined_si512 _mm512_undefined_epi32
161
162 extern __inline __m512i
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm512_set1_epi8 (char __A)
165 {
166 return __extension__ (__m512i)(__v64qi)
167 { __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A,
170 __A, __A, __A, __A, __A, __A, __A, __A,
171 __A, __A, __A, __A, __A, __A, __A, __A,
172 __A, __A, __A, __A, __A, __A, __A, __A,
173 __A, __A, __A, __A, __A, __A, __A, __A,
174 __A, __A, __A, __A, __A, __A, __A, __A };
175 }
176
177 extern __inline __m512i
178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179 _mm512_set1_epi16 (short __A)
180 {
181 return __extension__ (__m512i)(__v32hi)
182 { __A, __A, __A, __A, __A, __A, __A, __A,
183 __A, __A, __A, __A, __A, __A, __A, __A,
184 __A, __A, __A, __A, __A, __A, __A, __A,
185 __A, __A, __A, __A, __A, __A, __A, __A };
186 }
187
188 extern __inline __m512d
189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190 _mm512_set1_pd (double __A)
191 {
192 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
193 (__v2df) { __A, },
194 (__v8df)
195 _mm512_undefined_pd (),
196 (__mmask8) -1);
197 }
198
199 extern __inline __m512
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm512_set1_ps (float __A)
202 {
203 return (__m512) __builtin_ia32_broadcastss512 (__extension__
204 (__v4sf) { __A, },
205 (__v16sf)
206 _mm512_undefined_ps (),
207 (__mmask16) -1);
208 }
209
210 /* Create the vector [A B C D A B C D A B C D A B C D]. */
211 extern __inline __m512i
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
214 {
215 return __extension__ (__m512i)(__v16si)
216 { __D, __C, __B, __A, __D, __C, __B, __A,
217 __D, __C, __B, __A, __D, __C, __B, __A };
218 }
219
220 extern __inline __m512i
221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
223 long long __D)
224 {
225 return __extension__ (__m512i) (__v8di)
226 { __D, __C, __B, __A, __D, __C, __B, __A };
227 }
228
229 extern __inline __m512d
230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231 _mm512_set4_pd (double __A, double __B, double __C, double __D)
232 {
233 return __extension__ (__m512d)
234 { __D, __C, __B, __A, __D, __C, __B, __A };
235 }
236
237 extern __inline __m512
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm512_set4_ps (float __A, float __B, float __C, float __D)
240 {
241 return __extension__ (__m512)
242 { __D, __C, __B, __A, __D, __C, __B, __A,
243 __D, __C, __B, __A, __D, __C, __B, __A };
244 }
245
246 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
247 _mm512_set4_epi64(e3,e2,e1,e0)
248
249 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
250 _mm512_set4_epi32(e3,e2,e1,e0)
251
252 #define _mm512_setr4_pd(e0,e1,e2,e3) \
253 _mm512_set4_pd(e3,e2,e1,e0)
254
255 #define _mm512_setr4_ps(e0,e1,e2,e3) \
256 _mm512_set4_ps(e3,e2,e1,e0)
257
258 extern __inline __m512
259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
260 _mm512_setzero_ps (void)
261 {
262 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
263 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
264 }
265
266 extern __inline __m512d
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268 _mm512_setzero_pd (void)
269 {
270 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
271 }
272
273 extern __inline __m512i
274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275 _mm512_setzero_epi32 (void)
276 {
277 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
278 }
279
280 extern __inline __m512i
281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282 _mm512_setzero_si512 (void)
283 {
284 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
285 }
286
287 extern __inline __m512d
288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
290 {
291 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
292 (__v8df) __W,
293 (__mmask8) __U);
294 }
295
296 extern __inline __m512d
297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
299 {
300 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
301 (__v8df)
302 _mm512_setzero_pd (),
303 (__mmask8) __U);
304 }
305
306 extern __inline __m512
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
309 {
310 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
311 (__v16sf) __W,
312 (__mmask16) __U);
313 }
314
315 extern __inline __m512
316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
317 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
318 {
319 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
320 (__v16sf)
321 _mm512_setzero_ps (),
322 (__mmask16) __U);
323 }
324
325 extern __inline __m512d
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm512_load_pd (void const *__P)
328 {
329 return *(__m512d *) __P;
330 }
331
332 extern __inline __m512d
333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
335 {
336 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
337 (__v8df) __W,
338 (__mmask8) __U);
339 }
340
341 extern __inline __m512d
342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
344 {
345 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
346 (__v8df)
347 _mm512_setzero_pd (),
348 (__mmask8) __U);
349 }
350
351 extern __inline void
352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353 _mm512_store_pd (void *__P, __m512d __A)
354 {
355 *(__m512d *) __P = __A;
356 }
357
358 extern __inline void
359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
361 {
362 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
363 (__mmask8) __U);
364 }
365
366 extern __inline __m512
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm512_load_ps (void const *__P)
369 {
370 return *(__m512 *) __P;
371 }
372
373 extern __inline __m512
374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
376 {
377 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
378 (__v16sf) __W,
379 (__mmask16) __U);
380 }
381
382 extern __inline __m512
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
385 {
386 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
387 (__v16sf)
388 _mm512_setzero_ps (),
389 (__mmask16) __U);
390 }
391
392 extern __inline void
393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
394 _mm512_store_ps (void *__P, __m512 __A)
395 {
396 *(__m512 *) __P = __A;
397 }
398
399 extern __inline void
400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
401 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
402 {
403 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
404 (__mmask16) __U);
405 }
406
407 extern __inline __m512i
408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
410 {
411 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
412 (__v8di) __W,
413 (__mmask8) __U);
414 }
415
416 extern __inline __m512i
417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
419 {
420 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
421 (__v8di)
422 _mm512_setzero_si512 (),
423 (__mmask8) __U);
424 }
425
426 extern __inline __m512i
427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428 _mm512_load_epi64 (void const *__P)
429 {
430 return *(__m512i *) __P;
431 }
432
433 extern __inline __m512i
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
436 {
437 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
438 (__v8di) __W,
439 (__mmask8) __U);
440 }
441
442 extern __inline __m512i
443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
445 {
446 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
447 (__v8di)
448 _mm512_setzero_si512 (),
449 (__mmask8) __U);
450 }
451
452 extern __inline void
453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454 _mm512_store_epi64 (void *__P, __m512i __A)
455 {
456 *(__m512i *) __P = __A;
457 }
458
459 extern __inline void
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
462 {
463 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
464 (__mmask8) __U);
465 }
466
467 extern __inline __m512i
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
470 {
471 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
472 (__v16si) __W,
473 (__mmask16) __U);
474 }
475
476 extern __inline __m512i
477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
479 {
480 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
481 (__v16si)
482 _mm512_setzero_si512 (),
483 (__mmask16) __U);
484 }
485
486 extern __inline __m512i
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_load_si512 (void const *__P)
489 {
490 return *(__m512i *) __P;
491 }
492
493 extern __inline __m512i
494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495 _mm512_load_epi32 (void const *__P)
496 {
497 return *(__m512i *) __P;
498 }
499
500 extern __inline __m512i
501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
503 {
504 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
505 (__v16si) __W,
506 (__mmask16) __U);
507 }
508
509 extern __inline __m512i
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
512 {
513 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
514 (__v16si)
515 _mm512_setzero_si512 (),
516 (__mmask16) __U);
517 }
518
519 extern __inline void
520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521 _mm512_store_si512 (void *__P, __m512i __A)
522 {
523 *(__m512i *) __P = __A;
524 }
525
526 extern __inline void
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 _mm512_store_epi32 (void *__P, __m512i __A)
529 {
530 *(__m512i *) __P = __A;
531 }
532
533 extern __inline void
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
536 {
537 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
538 (__mmask16) __U);
539 }
540
541 extern __inline __m512i
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
544 {
545 return (__m512i) ((__v16su) __A * (__v16su) __B);
546 }
547
548 extern __inline __m512i
549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
551 {
552 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
553 (__v16si) __B,
554 (__v16si)
555 _mm512_setzero_si512 (),
556 __M);
557 }
558
559 extern __inline __m512i
560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
562 {
563 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
564 (__v16si) __B,
565 (__v16si) __W, __M);
566 }
567
568 extern __inline __m512i
569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
571 {
572 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
573 (__v16si) __Y,
574 (__v16si)
575 _mm512_undefined_epi32 (),
576 (__mmask16) -1);
577 }
578
579 extern __inline __m512i
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
582 {
583 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
584 (__v16si) __Y,
585 (__v16si) __W,
586 (__mmask16) __U);
587 }
588
589 extern __inline __m512i
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
592 {
593 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
594 (__v16si) __Y,
595 (__v16si)
596 _mm512_setzero_si512 (),
597 (__mmask16) __U);
598 }
599
600 extern __inline __m512i
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
603 {
604 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
605 (__v16si) __Y,
606 (__v16si)
607 _mm512_undefined_epi32 (),
608 (__mmask16) -1);
609 }
610
611 extern __inline __m512i
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
614 {
615 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
616 (__v16si) __Y,
617 (__v16si) __W,
618 (__mmask16) __U);
619 }
620
621 extern __inline __m512i
622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
624 {
625 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
626 (__v16si) __Y,
627 (__v16si)
628 _mm512_setzero_si512 (),
629 (__mmask16) __U);
630 }
631
632 extern __inline __m512i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
635 {
636 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
637 (__v16si) __Y,
638 (__v16si)
639 _mm512_undefined_epi32 (),
640 (__mmask16) -1);
641 }
642
643 extern __inline __m512i
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
646 {
647 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
648 (__v16si) __Y,
649 (__v16si) __W,
650 (__mmask16) __U);
651 }
652
653 extern __inline __m512i
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
656 {
657 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
658 (__v16si) __Y,
659 (__v16si)
660 _mm512_setzero_si512 (),
661 (__mmask16) __U);
662 }
663
664 extern __inline __m512i
665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666 _mm512_add_epi64 (__m512i __A, __m512i __B)
667 {
668 return (__m512i) ((__v8du) __A + (__v8du) __B);
669 }
670
671 extern __inline __m512i
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
674 {
675 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
676 (__v8di) __B,
677 (__v8di) __W,
678 (__mmask8) __U);
679 }
680
681 extern __inline __m512i
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
684 {
685 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
686 (__v8di) __B,
687 (__v8di)
688 _mm512_setzero_si512 (),
689 (__mmask8) __U);
690 }
691
692 extern __inline __m512i
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 _mm512_sub_epi64 (__m512i __A, __m512i __B)
695 {
696 return (__m512i) ((__v8du) __A - (__v8du) __B);
697 }
698
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702 {
703 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
704 (__v8di) __B,
705 (__v8di) __W,
706 (__mmask8) __U);
707 }
708
709 extern __inline __m512i
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
712 {
713 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
714 (__v8di) __B,
715 (__v8di)
716 _mm512_setzero_si512 (),
717 (__mmask8) __U);
718 }
719
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
723 {
724 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
725 (__v8di) __Y,
726 (__v8di)
727 _mm512_undefined_pd (),
728 (__mmask8) -1);
729 }
730
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
734 {
735 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
736 (__v8di) __Y,
737 (__v8di) __W,
738 (__mmask8) __U);
739 }
740
741 extern __inline __m512i
742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
744 {
745 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
746 (__v8di) __Y,
747 (__v8di)
748 _mm512_setzero_si512 (),
749 (__mmask8) __U);
750 }
751
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
755 {
756 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
757 (__v8di) __Y,
758 (__v8di)
759 _mm512_undefined_epi32 (),
760 (__mmask8) -1);
761 }
762
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
766 {
767 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
768 (__v8di) __Y,
769 (__v8di) __W,
770 (__mmask8) __U);
771 }
772
773 extern __inline __m512i
774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
776 {
777 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
778 (__v8di) __Y,
779 (__v8di)
780 _mm512_setzero_si512 (),
781 (__mmask8) __U);
782 }
783
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
787 {
788 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
789 (__v8di) __Y,
790 (__v8di)
791 _mm512_undefined_epi32 (),
792 (__mmask8) -1);
793 }
794
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
798 {
799 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
800 (__v8di) __Y,
801 (__v8di) __W,
802 (__mmask8) __U);
803 }
804
805 extern __inline __m512i
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
808 {
809 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
810 (__v8di) __Y,
811 (__v8di)
812 _mm512_setzero_si512 (),
813 (__mmask8) __U);
814 }
815
816 extern __inline __m512i
817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818 _mm512_add_epi32 (__m512i __A, __m512i __B)
819 {
820 return (__m512i) ((__v16su) __A + (__v16su) __B);
821 }
822
823 extern __inline __m512i
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
826 {
827 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
828 (__v16si) __B,
829 (__v16si) __W,
830 (__mmask16) __U);
831 }
832
833 extern __inline __m512i
834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
836 {
837 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
838 (__v16si) __B,
839 (__v16si)
840 _mm512_setzero_si512 (),
841 (__mmask16) __U);
842 }
843
844 extern __inline __m512i
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
847 {
848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
849 (__v16si) __Y,
850 (__v8di)
851 _mm512_undefined_epi32 (),
852 (__mmask8) -1);
853 }
854
855 extern __inline __m512i
856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
858 {
859 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
860 (__v16si) __Y,
861 (__v8di) __W, __M);
862 }
863
864 extern __inline __m512i
865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
867 {
868 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
869 (__v16si) __Y,
870 (__v8di)
871 _mm512_setzero_si512 (),
872 __M);
873 }
874
875 extern __inline __m512i
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 _mm512_sub_epi32 (__m512i __A, __m512i __B)
878 {
879 return (__m512i) ((__v16su) __A - (__v16su) __B);
880 }
881
882 extern __inline __m512i
883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
885 {
886 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
887 (__v16si) __B,
888 (__v16si) __W,
889 (__mmask16) __U);
890 }
891
892 extern __inline __m512i
893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
895 {
896 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
897 (__v16si) __B,
898 (__v16si)
899 _mm512_setzero_si512 (),
900 (__mmask16) __U);
901 }
902
903 extern __inline __m512i
904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
905 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
906 {
907 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
908 (__v16si) __Y,
909 (__v8di)
910 _mm512_undefined_epi32 (),
911 (__mmask8) -1);
912 }
913
914 extern __inline __m512i
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
917 {
918 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
919 (__v16si) __Y,
920 (__v8di) __W, __M);
921 }
922
923 extern __inline __m512i
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
926 {
927 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
928 (__v16si) __Y,
929 (__v8di)
930 _mm512_setzero_si512 (),
931 __M);
932 }
933
934 #ifdef __OPTIMIZE__
935 extern __inline __m512i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
938 {
939 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
940 (__v8di)
941 _mm512_undefined_epi32 (),
942 (__mmask8) -1);
943 }
944
945 extern __inline __m512i
946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
948 unsigned int __B)
949 {
950 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
951 (__v8di) __W,
952 (__mmask8) __U);
953 }
954
955 extern __inline __m512i
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
958 {
959 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
960 (__v8di)
961 _mm512_setzero_si512 (),
962 (__mmask8) __U);
963 }
964 #else
965 #define _mm512_slli_epi64(X, C) \
966 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
967 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
968 (__mmask8)-1))
969
970 #define _mm512_mask_slli_epi64(W, U, X, C) \
971 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
972 (__v8di)(__m512i)(W),\
973 (__mmask8)(U)))
974
975 #define _mm512_maskz_slli_epi64(U, X, C) \
976 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
977 (__v8di)(__m512i)_mm512_setzero_si512 (),\
978 (__mmask8)(U)))
979 #endif
980
981 extern __inline __m512i
982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983 _mm512_sll_epi64 (__m512i __A, __m128i __B)
984 {
985 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
986 (__v2di) __B,
987 (__v8di)
988 _mm512_undefined_epi32 (),
989 (__mmask8) -1);
990 }
991
992 extern __inline __m512i
993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
995 {
996 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
997 (__v2di) __B,
998 (__v8di) __W,
999 (__mmask8) __U);
1000 }
1001
1002 extern __inline __m512i
1003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1004 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1005 {
1006 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1007 (__v2di) __B,
1008 (__v8di)
1009 _mm512_setzero_si512 (),
1010 (__mmask8) __U);
1011 }
1012
1013 #ifdef __OPTIMIZE__
1014 extern __inline __m512i
1015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1017 {
1018 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1019 (__v8di)
1020 _mm512_undefined_epi32 (),
1021 (__mmask8) -1);
1022 }
1023
1024 extern __inline __m512i
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1027 __m512i __A, unsigned int __B)
1028 {
1029 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1030 (__v8di) __W,
1031 (__mmask8) __U);
1032 }
1033
1034 extern __inline __m512i
1035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1037 {
1038 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1039 (__v8di)
1040 _mm512_setzero_si512 (),
1041 (__mmask8) __U);
1042 }
1043 #else
1044 #define _mm512_srli_epi64(X, C) \
1045 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1046 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1047 (__mmask8)-1))
1048
1049 #define _mm512_mask_srli_epi64(W, U, X, C) \
1050 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1051 (__v8di)(__m512i)(W),\
1052 (__mmask8)(U)))
1053
1054 #define _mm512_maskz_srli_epi64(U, X, C) \
1055 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1056 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1057 (__mmask8)(U)))
1058 #endif
1059
1060 extern __inline __m512i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1063 {
1064 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1065 (__v2di) __B,
1066 (__v8di)
1067 _mm512_undefined_epi32 (),
1068 (__mmask8) -1);
1069 }
1070
1071 extern __inline __m512i
1072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1074 {
1075 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1076 (__v2di) __B,
1077 (__v8di) __W,
1078 (__mmask8) __U);
1079 }
1080
1081 extern __inline __m512i
1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1084 {
1085 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1086 (__v2di) __B,
1087 (__v8di)
1088 _mm512_setzero_si512 (),
1089 (__mmask8) __U);
1090 }
1091
1092 #ifdef __OPTIMIZE__
1093 extern __inline __m512i
1094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1096 {
1097 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1098 (__v8di)
1099 _mm512_undefined_epi32 (),
1100 (__mmask8) -1);
1101 }
1102
1103 extern __inline __m512i
1104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1106 unsigned int __B)
1107 {
1108 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1109 (__v8di) __W,
1110 (__mmask8) __U);
1111 }
1112
1113 extern __inline __m512i
1114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1115 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1116 {
1117 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1118 (__v8di)
1119 _mm512_setzero_si512 (),
1120 (__mmask8) __U);
1121 }
1122 #else
1123 #define _mm512_srai_epi64(X, C) \
1124 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1125 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1126 (__mmask8)-1))
1127
1128 #define _mm512_mask_srai_epi64(W, U, X, C) \
1129 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1130 (__v8di)(__m512i)(W),\
1131 (__mmask8)(U)))
1132
1133 #define _mm512_maskz_srai_epi64(U, X, C) \
1134 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1135 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1136 (__mmask8)(U)))
1137 #endif
1138
1139 extern __inline __m512i
1140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1142 {
1143 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1144 (__v2di) __B,
1145 (__v8di)
1146 _mm512_undefined_epi32 (),
1147 (__mmask8) -1);
1148 }
1149
1150 extern __inline __m512i
1151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1152 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1153 {
1154 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1155 (__v2di) __B,
1156 (__v8di) __W,
1157 (__mmask8) __U);
1158 }
1159
1160 extern __inline __m512i
1161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1162 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1163 {
1164 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1165 (__v2di) __B,
1166 (__v8di)
1167 _mm512_setzero_si512 (),
1168 (__mmask8) __U);
1169 }
1170
1171 #ifdef __OPTIMIZE__
1172 extern __inline __m512i
1173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1175 {
1176 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1177 (__v16si)
1178 _mm512_undefined_epi32 (),
1179 (__mmask16) -1);
1180 }
1181
1182 extern __inline __m512i
1183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1184 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1185 unsigned int __B)
1186 {
1187 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1188 (__v16si) __W,
1189 (__mmask16) __U);
1190 }
1191
1192 extern __inline __m512i
1193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1195 {
1196 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1197 (__v16si)
1198 _mm512_setzero_si512 (),
1199 (__mmask16) __U);
1200 }
1201 #else
1202 #define _mm512_slli_epi32(X, C) \
1203 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1204 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1205 (__mmask16)-1))
1206
1207 #define _mm512_mask_slli_epi32(W, U, X, C) \
1208 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1209 (__v16si)(__m512i)(W),\
1210 (__mmask16)(U)))
1211
1212 #define _mm512_maskz_slli_epi32(U, X, C) \
1213 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1214 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1215 (__mmask16)(U)))
1216 #endif
1217
1218 extern __inline __m512i
1219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1221 {
1222 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1223 (__v4si) __B,
1224 (__v16si)
1225 _mm512_undefined_epi32 (),
1226 (__mmask16) -1);
1227 }
1228
1229 extern __inline __m512i
1230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1232 {
1233 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1234 (__v4si) __B,
1235 (__v16si) __W,
1236 (__mmask16) __U);
1237 }
1238
1239 extern __inline __m512i
1240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1242 {
1243 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1244 (__v4si) __B,
1245 (__v16si)
1246 _mm512_setzero_si512 (),
1247 (__mmask16) __U);
1248 }
1249
1250 #ifdef __OPTIMIZE__
1251 extern __inline __m512i
1252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1253 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1254 {
1255 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1256 (__v16si)
1257 _mm512_undefined_epi32 (),
1258 (__mmask16) -1);
1259 }
1260
1261 extern __inline __m512i
1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1264 __m512i __A, unsigned int __B)
1265 {
1266 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1267 (__v16si) __W,
1268 (__mmask16) __U);
1269 }
1270
1271 extern __inline __m512i
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1274 {
1275 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1276 (__v16si)
1277 _mm512_setzero_si512 (),
1278 (__mmask16) __U);
1279 }
1280 #else
1281 #define _mm512_srli_epi32(X, C) \
1282 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1283 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1284 (__mmask16)-1))
1285
1286 #define _mm512_mask_srli_epi32(W, U, X, C) \
1287 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1288 (__v16si)(__m512i)(W),\
1289 (__mmask16)(U)))
1290
1291 #define _mm512_maskz_srli_epi32(U, X, C) \
1292 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1293 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1294 (__mmask16)(U)))
1295 #endif
1296
1297 extern __inline __m512i
1298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1300 {
1301 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1302 (__v4si) __B,
1303 (__v16si)
1304 _mm512_undefined_epi32 (),
1305 (__mmask16) -1);
1306 }
1307
1308 extern __inline __m512i
1309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1311 {
1312 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1313 (__v4si) __B,
1314 (__v16si) __W,
1315 (__mmask16) __U);
1316 }
1317
1318 extern __inline __m512i
1319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1320 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1321 {
1322 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1323 (__v4si) __B,
1324 (__v16si)
1325 _mm512_setzero_si512 (),
1326 (__mmask16) __U);
1327 }
1328
1329 #ifdef __OPTIMIZE__
1330 extern __inline __m512i
1331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1333 {
1334 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1335 (__v16si)
1336 _mm512_undefined_epi32 (),
1337 (__mmask16) -1);
1338 }
1339
1340 extern __inline __m512i
1341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1343 unsigned int __B)
1344 {
1345 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1346 (__v16si) __W,
1347 (__mmask16) __U);
1348 }
1349
1350 extern __inline __m512i
1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1353 {
1354 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1355 (__v16si)
1356 _mm512_setzero_si512 (),
1357 (__mmask16) __U);
1358 }
1359 #else
1360 #define _mm512_srai_epi32(X, C) \
1361 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1362 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1363 (__mmask16)-1))
1364
1365 #define _mm512_mask_srai_epi32(W, U, X, C) \
1366 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1367 (__v16si)(__m512i)(W),\
1368 (__mmask16)(U)))
1369
1370 #define _mm512_maskz_srai_epi32(U, X, C) \
1371 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1372 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1373 (__mmask16)(U)))
1374 #endif
1375
1376 extern __inline __m512i
1377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1379 {
1380 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1381 (__v4si) __B,
1382 (__v16si)
1383 _mm512_undefined_epi32 (),
1384 (__mmask16) -1);
1385 }
1386
1387 extern __inline __m512i
1388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1390 {
1391 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1392 (__v4si) __B,
1393 (__v16si) __W,
1394 (__mmask16) __U);
1395 }
1396
1397 extern __inline __m512i
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1400 {
1401 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1402 (__v4si) __B,
1403 (__v16si)
1404 _mm512_setzero_si512 (),
1405 (__mmask16) __U);
1406 }
1407
1408 #ifdef __OPTIMIZE__
1409 extern __inline __m128d
1410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1411 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1412 {
1413 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1414 (__v2df) __B,
1415 __R);
1416 }
1417
1418 extern __inline __m128
1419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1421 {
1422 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1423 (__v4sf) __B,
1424 __R);
1425 }
1426
1427 extern __inline __m128d
1428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1429 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1430 {
1431 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1432 (__v2df) __B,
1433 __R);
1434 }
1435
1436 extern __inline __m128
1437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1439 {
1440 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1441 (__v4sf) __B,
1442 __R);
1443 }
1444
1445 #else
1446 #define _mm_add_round_sd(A, B, C) \
1447 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1448
1449 #define _mm_add_round_ss(A, B, C) \
1450 (__m128)__builtin_ia32_addss_round(A, B, C)
1451
1452 #define _mm_sub_round_sd(A, B, C) \
1453 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1454
1455 #define _mm_sub_round_ss(A, B, C) \
1456 (__m128)__builtin_ia32_subss_round(A, B, C)
1457 #endif
1458
1459 #ifdef __OPTIMIZE__
1460 extern __inline __m512i
1461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1462 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1463 const int __imm)
1464 {
1465 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1466 (__v8di) __B,
1467 (__v8di) __C, __imm,
1468 (__mmask8) -1);
1469 }
1470
1471 extern __inline __m512i
1472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1474 __m512i __C, const int __imm)
1475 {
1476 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1477 (__v8di) __B,
1478 (__v8di) __C, __imm,
1479 (__mmask8) __U);
1480 }
1481
1482 extern __inline __m512i
1483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1485 __m512i __C, const int __imm)
1486 {
1487 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1488 (__v8di) __B,
1489 (__v8di) __C,
1490 __imm, (__mmask8) __U);
1491 }
1492
1493 extern __inline __m512i
1494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1496 const int __imm)
1497 {
1498 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1499 (__v16si) __B,
1500 (__v16si) __C,
1501 __imm, (__mmask16) -1);
1502 }
1503
1504 extern __inline __m512i
1505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1506 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1507 __m512i __C, const int __imm)
1508 {
1509 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1510 (__v16si) __B,
1511 (__v16si) __C,
1512 __imm, (__mmask16) __U);
1513 }
1514
1515 extern __inline __m512i
1516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1517 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1518 __m512i __C, const int __imm)
1519 {
1520 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1521 (__v16si) __B,
1522 (__v16si) __C,
1523 __imm, (__mmask16) __U);
1524 }
1525 #else
1526 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1527 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1528 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1529 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1530 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1531 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1532 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1533 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1534 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1535 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1536 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1537 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1538 (__mmask16)-1))
1539 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1540 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1541 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1542 (__mmask16)(U)))
1543 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1544 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1545 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1546 (__mmask16)(U)))
1547 #endif
1548
1549 extern __inline __m512d
1550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1551 _mm512_rcp14_pd (__m512d __A)
1552 {
1553 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1554 (__v8df)
1555 _mm512_undefined_pd (),
1556 (__mmask8) -1);
1557 }
1558
1559 extern __inline __m512d
1560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1561 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1562 {
1563 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1564 (__v8df) __W,
1565 (__mmask8) __U);
1566 }
1567
1568 extern __inline __m512d
1569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1571 {
1572 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1573 (__v8df)
1574 _mm512_setzero_pd (),
1575 (__mmask8) __U);
1576 }
1577
1578 extern __inline __m512
1579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1580 _mm512_rcp14_ps (__m512 __A)
1581 {
1582 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1583 (__v16sf)
1584 _mm512_undefined_ps (),
1585 (__mmask16) -1);
1586 }
1587
1588 extern __inline __m512
1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1591 {
1592 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1593 (__v16sf) __W,
1594 (__mmask16) __U);
1595 }
1596
1597 extern __inline __m512
1598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1599 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1600 {
1601 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1602 (__v16sf)
1603 _mm512_setzero_ps (),
1604 (__mmask16) __U);
1605 }
1606
1607 extern __inline __m128d
1608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1609 _mm_rcp14_sd (__m128d __A, __m128d __B)
1610 {
1611 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1612 (__v2df) __A);
1613 }
1614
1615 extern __inline __m128
1616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1617 _mm_rcp14_ss (__m128 __A, __m128 __B)
1618 {
1619 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1620 (__v4sf) __A);
1621 }
1622
1623 extern __inline __m512d
1624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1625 _mm512_rsqrt14_pd (__m512d __A)
1626 {
1627 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1628 (__v8df)
1629 _mm512_undefined_pd (),
1630 (__mmask8) -1);
1631 }
1632
1633 extern __inline __m512d
1634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1635 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1636 {
1637 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1638 (__v8df) __W,
1639 (__mmask8) __U);
1640 }
1641
1642 extern __inline __m512d
1643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1644 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1645 {
1646 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1647 (__v8df)
1648 _mm512_setzero_pd (),
1649 (__mmask8) __U);
1650 }
1651
1652 extern __inline __m512
1653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1654 _mm512_rsqrt14_ps (__m512 __A)
1655 {
1656 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1657 (__v16sf)
1658 _mm512_undefined_ps (),
1659 (__mmask16) -1);
1660 }
1661
1662 extern __inline __m512
1663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1664 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1665 {
1666 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1667 (__v16sf) __W,
1668 (__mmask16) __U);
1669 }
1670
1671 extern __inline __m512
1672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1673 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1674 {
1675 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1676 (__v16sf)
1677 _mm512_setzero_ps (),
1678 (__mmask16) __U);
1679 }
1680
1681 extern __inline __m128d
1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1684 {
1685 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1686 (__v2df) __A);
1687 }
1688
1689 extern __inline __m128
1690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1691 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1692 {
1693 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1694 (__v4sf) __A);
1695 }
1696
1697 #ifdef __OPTIMIZE__
1698 extern __inline __m512d
1699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1700 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1701 {
1702 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1703 (__v8df)
1704 _mm512_undefined_pd (),
1705 (__mmask8) -1, __R);
1706 }
1707
1708 extern __inline __m512d
1709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1710 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1711 const int __R)
1712 {
1713 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1714 (__v8df) __W,
1715 (__mmask8) __U, __R);
1716 }
1717
1718 extern __inline __m512d
1719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1721 {
1722 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1723 (__v8df)
1724 _mm512_setzero_pd (),
1725 (__mmask8) __U, __R);
1726 }
1727
1728 extern __inline __m512
1729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1731 {
1732 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1733 (__v16sf)
1734 _mm512_undefined_ps (),
1735 (__mmask16) -1, __R);
1736 }
1737
1738 extern __inline __m512
1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1741 {
1742 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1743 (__v16sf) __W,
1744 (__mmask16) __U, __R);
1745 }
1746
1747 extern __inline __m512
1748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1749 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1750 {
1751 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1752 (__v16sf)
1753 _mm512_setzero_ps (),
1754 (__mmask16) __U, __R);
1755 }
1756
1757 extern __inline __m128d
1758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1759 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1760 {
1761 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1762 (__v2df) __A,
1763 __R);
1764 }
1765
1766 extern __inline __m128
1767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1768 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1769 {
1770 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1771 (__v4sf) __A,
1772 __R);
1773 }
1774 #else
1775 #define _mm512_sqrt_round_pd(A, C) \
1776 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1777
1778 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1779 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1780
1781 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1782 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1783
1784 #define _mm512_sqrt_round_ps(A, C) \
1785 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1786
1787 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1788 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1789
1790 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1791 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1792
1793 #define _mm_sqrt_round_sd(A, B, C) \
1794 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1795
1796 #define _mm_sqrt_round_ss(A, B, C) \
1797 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1798 #endif
1799
1800 extern __inline __m512i
1801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1802 _mm512_cvtepi8_epi32 (__m128i __A)
1803 {
1804 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1805 (__v16si)
1806 _mm512_undefined_epi32 (),
1807 (__mmask16) -1);
1808 }
1809
1810 extern __inline __m512i
1811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1812 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1813 {
1814 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1815 (__v16si) __W,
1816 (__mmask16) __U);
1817 }
1818
1819 extern __inline __m512i
1820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1821 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1822 {
1823 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1824 (__v16si)
1825 _mm512_setzero_si512 (),
1826 (__mmask16) __U);
1827 }
1828
1829 extern __inline __m512i
1830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1831 _mm512_cvtepi8_epi64 (__m128i __A)
1832 {
1833 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1834 (__v8di)
1835 _mm512_undefined_epi32 (),
1836 (__mmask8) -1);
1837 }
1838
1839 extern __inline __m512i
1840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1841 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1842 {
1843 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1844 (__v8di) __W,
1845 (__mmask8) __U);
1846 }
1847
1848 extern __inline __m512i
1849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1850 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1851 {
1852 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1853 (__v8di)
1854 _mm512_setzero_si512 (),
1855 (__mmask8) __U);
1856 }
1857
1858 extern __inline __m512i
1859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1860 _mm512_cvtepi16_epi32 (__m256i __A)
1861 {
1862 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1863 (__v16si)
1864 _mm512_undefined_epi32 (),
1865 (__mmask16) -1);
1866 }
1867
1868 extern __inline __m512i
1869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1870 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1871 {
1872 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1873 (__v16si) __W,
1874 (__mmask16) __U);
1875 }
1876
1877 extern __inline __m512i
1878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1879 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1880 {
1881 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1882 (__v16si)
1883 _mm512_setzero_si512 (),
1884 (__mmask16) __U);
1885 }
1886
1887 extern __inline __m512i
1888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889 _mm512_cvtepi16_epi64 (__m128i __A)
1890 {
1891 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1892 (__v8di)
1893 _mm512_undefined_epi32 (),
1894 (__mmask8) -1);
1895 }
1896
1897 extern __inline __m512i
1898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1899 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1900 {
1901 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1902 (__v8di) __W,
1903 (__mmask8) __U);
1904 }
1905
1906 extern __inline __m512i
1907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1908 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1909 {
1910 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1911 (__v8di)
1912 _mm512_setzero_si512 (),
1913 (__mmask8) __U);
1914 }
1915
1916 extern __inline __m512i
1917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1918 _mm512_cvtepi32_epi64 (__m256i __X)
1919 {
1920 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1921 (__v8di)
1922 _mm512_undefined_epi32 (),
1923 (__mmask8) -1);
1924 }
1925
1926 extern __inline __m512i
1927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1928 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1929 {
1930 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1931 (__v8di) __W,
1932 (__mmask8) __U);
1933 }
1934
1935 extern __inline __m512i
1936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1937 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1938 {
1939 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1940 (__v8di)
1941 _mm512_setzero_si512 (),
1942 (__mmask8) __U);
1943 }
1944
1945 extern __inline __m512i
1946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1947 _mm512_cvtepu8_epi32 (__m128i __A)
1948 {
1949 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1950 (__v16si)
1951 _mm512_undefined_epi32 (),
1952 (__mmask16) -1);
1953 }
1954
1955 extern __inline __m512i
1956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1957 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1958 {
1959 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1960 (__v16si) __W,
1961 (__mmask16) __U);
1962 }
1963
1964 extern __inline __m512i
1965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1967 {
1968 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1969 (__v16si)
1970 _mm512_setzero_si512 (),
1971 (__mmask16) __U);
1972 }
1973
1974 extern __inline __m512i
1975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1976 _mm512_cvtepu8_epi64 (__m128i __A)
1977 {
1978 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1979 (__v8di)
1980 _mm512_undefined_epi32 (),
1981 (__mmask8) -1);
1982 }
1983
1984 extern __inline __m512i
1985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1987 {
1988 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1989 (__v8di) __W,
1990 (__mmask8) __U);
1991 }
1992
1993 extern __inline __m512i
1994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1995 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1996 {
1997 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1998 (__v8di)
1999 _mm512_setzero_si512 (),
2000 (__mmask8) __U);
2001 }
2002
2003 extern __inline __m512i
2004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2005 _mm512_cvtepu16_epi32 (__m256i __A)
2006 {
2007 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2008 (__v16si)
2009 _mm512_undefined_epi32 (),
2010 (__mmask16) -1);
2011 }
2012
2013 extern __inline __m512i
2014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2015 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2016 {
2017 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2018 (__v16si) __W,
2019 (__mmask16) __U);
2020 }
2021
2022 extern __inline __m512i
2023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2024 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2025 {
2026 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2027 (__v16si)
2028 _mm512_setzero_si512 (),
2029 (__mmask16) __U);
2030 }
2031
2032 extern __inline __m512i
2033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2034 _mm512_cvtepu16_epi64 (__m128i __A)
2035 {
2036 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2037 (__v8di)
2038 _mm512_undefined_epi32 (),
2039 (__mmask8) -1);
2040 }
2041
2042 extern __inline __m512i
2043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2044 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2045 {
2046 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2047 (__v8di) __W,
2048 (__mmask8) __U);
2049 }
2050
2051 extern __inline __m512i
2052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2053 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2054 {
2055 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2056 (__v8di)
2057 _mm512_setzero_si512 (),
2058 (__mmask8) __U);
2059 }
2060
2061 extern __inline __m512i
2062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063 _mm512_cvtepu32_epi64 (__m256i __X)
2064 {
2065 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2066 (__v8di)
2067 _mm512_undefined_epi32 (),
2068 (__mmask8) -1);
2069 }
2070
2071 extern __inline __m512i
2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2074 {
2075 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2076 (__v8di) __W,
2077 (__mmask8) __U);
2078 }
2079
2080 extern __inline __m512i
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2083 {
2084 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2085 (__v8di)
2086 _mm512_setzero_si512 (),
2087 (__mmask8) __U);
2088 }
2089
2090 #ifdef __OPTIMIZE__
2091 extern __inline __m512d
2092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2093 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2094 {
2095 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2096 (__v8df) __B,
2097 (__v8df)
2098 _mm512_undefined_pd (),
2099 (__mmask8) -1, __R);
2100 }
2101
2102 extern __inline __m512d
2103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2104 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2105 __m512d __B, const int __R)
2106 {
2107 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2108 (__v8df) __B,
2109 (__v8df) __W,
2110 (__mmask8) __U, __R);
2111 }
2112
2113 extern __inline __m512d
2114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2115 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2116 const int __R)
2117 {
2118 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2119 (__v8df) __B,
2120 (__v8df)
2121 _mm512_setzero_pd (),
2122 (__mmask8) __U, __R);
2123 }
2124
2125 extern __inline __m512
2126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2127 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2128 {
2129 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2130 (__v16sf) __B,
2131 (__v16sf)
2132 _mm512_undefined_ps (),
2133 (__mmask16) -1, __R);
2134 }
2135
2136 extern __inline __m512
2137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2138 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2139 __m512 __B, const int __R)
2140 {
2141 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2142 (__v16sf) __B,
2143 (__v16sf) __W,
2144 (__mmask16) __U, __R);
2145 }
2146
2147 extern __inline __m512
2148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2149 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2150 {
2151 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2152 (__v16sf) __B,
2153 (__v16sf)
2154 _mm512_setzero_ps (),
2155 (__mmask16) __U, __R);
2156 }
2157
2158 extern __inline __m512d
2159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2161 {
2162 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2163 (__v8df) __B,
2164 (__v8df)
2165 _mm512_undefined_pd (),
2166 (__mmask8) -1, __R);
2167 }
2168
2169 extern __inline __m512d
2170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2171 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2172 __m512d __B, const int __R)
2173 {
2174 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2175 (__v8df) __B,
2176 (__v8df) __W,
2177 (__mmask8) __U, __R);
2178 }
2179
2180 extern __inline __m512d
2181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2182 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2183 const int __R)
2184 {
2185 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2186 (__v8df) __B,
2187 (__v8df)
2188 _mm512_setzero_pd (),
2189 (__mmask8) __U, __R);
2190 }
2191
2192 extern __inline __m512
2193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2195 {
2196 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2197 (__v16sf) __B,
2198 (__v16sf)
2199 _mm512_undefined_ps (),
2200 (__mmask16) -1, __R);
2201 }
2202
2203 extern __inline __m512
2204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2205 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2206 __m512 __B, const int __R)
2207 {
2208 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2209 (__v16sf) __B,
2210 (__v16sf) __W,
2211 (__mmask16) __U, __R);
2212 }
2213
2214 extern __inline __m512
2215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2216 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2217 {
2218 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2219 (__v16sf) __B,
2220 (__v16sf)
2221 _mm512_setzero_ps (),
2222 (__mmask16) __U, __R);
2223 }
2224 #else
2225 #define _mm512_add_round_pd(A, B, C) \
2226 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2227
2228 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2229 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2230
2231 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2232 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2233
2234 #define _mm512_add_round_ps(A, B, C) \
2235 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2236
2237 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2238 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2239
2240 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2241 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2242
2243 #define _mm512_sub_round_pd(A, B, C) \
2244 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2245
2246 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2247 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2248
2249 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2250 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2251
2252 #define _mm512_sub_round_ps(A, B, C) \
2253 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2254
2255 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2256 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2257
2258 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2259 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2260 #endif
2261
2262 #ifdef __OPTIMIZE__
2263 extern __inline __m512d
2264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2265 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2266 {
2267 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2268 (__v8df) __B,
2269 (__v8df)
2270 _mm512_undefined_pd (),
2271 (__mmask8) -1, __R);
2272 }
2273
2274 extern __inline __m512d
2275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2277 __m512d __B, const int __R)
2278 {
2279 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2280 (__v8df) __B,
2281 (__v8df) __W,
2282 (__mmask8) __U, __R);
2283 }
2284
2285 extern __inline __m512d
2286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2288 const int __R)
2289 {
2290 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2291 (__v8df) __B,
2292 (__v8df)
2293 _mm512_setzero_pd (),
2294 (__mmask8) __U, __R);
2295 }
2296
2297 extern __inline __m512
2298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2299 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2300 {
2301 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2302 (__v16sf) __B,
2303 (__v16sf)
2304 _mm512_undefined_ps (),
2305 (__mmask16) -1, __R);
2306 }
2307
2308 extern __inline __m512
2309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2310 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2311 __m512 __B, const int __R)
2312 {
2313 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2314 (__v16sf) __B,
2315 (__v16sf) __W,
2316 (__mmask16) __U, __R);
2317 }
2318
2319 extern __inline __m512
2320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2321 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2322 {
2323 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2324 (__v16sf) __B,
2325 (__v16sf)
2326 _mm512_setzero_ps (),
2327 (__mmask16) __U, __R);
2328 }
2329
2330 extern __inline __m512d
2331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2332 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2333 {
2334 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2335 (__v8df) __V,
2336 (__v8df)
2337 _mm512_undefined_pd (),
2338 (__mmask8) -1, __R);
2339 }
2340
2341 extern __inline __m512d
2342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2343 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2344 __m512d __V, const int __R)
2345 {
2346 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2347 (__v8df) __V,
2348 (__v8df) __W,
2349 (__mmask8) __U, __R);
2350 }
2351
2352 extern __inline __m512d
2353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2355 const int __R)
2356 {
2357 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2358 (__v8df) __V,
2359 (__v8df)
2360 _mm512_setzero_pd (),
2361 (__mmask8) __U, __R);
2362 }
2363
2364 extern __inline __m512
2365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2366 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2367 {
2368 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2369 (__v16sf) __B,
2370 (__v16sf)
2371 _mm512_undefined_ps (),
2372 (__mmask16) -1, __R);
2373 }
2374
2375 extern __inline __m512
2376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2377 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2378 __m512 __B, const int __R)
2379 {
2380 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2381 (__v16sf) __B,
2382 (__v16sf) __W,
2383 (__mmask16) __U, __R);
2384 }
2385
2386 extern __inline __m512
2387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2388 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2389 {
2390 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2391 (__v16sf) __B,
2392 (__v16sf)
2393 _mm512_setzero_ps (),
2394 (__mmask16) __U, __R);
2395 }
2396
2397 extern __inline __m128d
2398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2400 {
2401 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2402 (__v2df) __B,
2403 __R);
2404 }
2405
2406 extern __inline __m128
2407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2408 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2409 {
2410 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2411 (__v4sf) __B,
2412 __R);
2413 }
2414
2415 extern __inline __m128d
2416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2417 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2418 {
2419 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2420 (__v2df) __B,
2421 __R);
2422 }
2423
2424 extern __inline __m128
2425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2426 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2427 {
2428 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2429 (__v4sf) __B,
2430 __R);
2431 }
2432
2433 #else
2434 #define _mm512_mul_round_pd(A, B, C) \
2435 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2436
2437 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2438 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2439
2440 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2441 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2442
2443 #define _mm512_mul_round_ps(A, B, C) \
2444 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2445
2446 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2447 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2448
2449 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2450 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2451
2452 #define _mm512_div_round_pd(A, B, C) \
2453 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2454
2455 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2456 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2457
2458 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2459 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2460
2461 #define _mm512_div_round_ps(A, B, C) \
2462 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2463
2464 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2465 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2466
2467 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2468 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2469
2470 #define _mm_mul_round_sd(A, B, C) \
2471 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2472
2473 #define _mm_mul_round_ss(A, B, C) \
2474 (__m128)__builtin_ia32_mulss_round(A, B, C)
2475
2476 #define _mm_div_round_sd(A, B, C) \
2477 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2478
2479 #define _mm_div_round_ss(A, B, C) \
2480 (__m128)__builtin_ia32_divss_round(A, B, C)
2481 #endif
2482
2483 #ifdef __OPTIMIZE__
2484 extern __inline __m512d
2485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2486 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2487 {
2488 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2489 (__v8df) __B,
2490 (__v8df)
2491 _mm512_undefined_pd (),
2492 (__mmask8) -1, __R);
2493 }
2494
2495 extern __inline __m512d
2496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2497 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2498 __m512d __B, const int __R)
2499 {
2500 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2501 (__v8df) __B,
2502 (__v8df) __W,
2503 (__mmask8) __U, __R);
2504 }
2505
2506 extern __inline __m512d
2507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2508 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2509 const int __R)
2510 {
2511 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2512 (__v8df) __B,
2513 (__v8df)
2514 _mm512_setzero_pd (),
2515 (__mmask8) __U, __R);
2516 }
2517
2518 extern __inline __m512
2519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2521 {
2522 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2523 (__v16sf) __B,
2524 (__v16sf)
2525 _mm512_undefined_ps (),
2526 (__mmask16) -1, __R);
2527 }
2528
2529 extern __inline __m512
2530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2531 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2532 __m512 __B, const int __R)
2533 {
2534 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2535 (__v16sf) __B,
2536 (__v16sf) __W,
2537 (__mmask16) __U, __R);
2538 }
2539
2540 extern __inline __m512
2541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2543 {
2544 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2545 (__v16sf) __B,
2546 (__v16sf)
2547 _mm512_setzero_ps (),
2548 (__mmask16) __U, __R);
2549 }
2550
2551 extern __inline __m512d
2552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2553 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2554 {
2555 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2556 (__v8df) __B,
2557 (__v8df)
2558 _mm512_undefined_pd (),
2559 (__mmask8) -1, __R);
2560 }
2561
2562 extern __inline __m512d
2563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2564 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2565 __m512d __B, const int __R)
2566 {
2567 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2568 (__v8df) __B,
2569 (__v8df) __W,
2570 (__mmask8) __U, __R);
2571 }
2572
2573 extern __inline __m512d
2574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2575 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2576 const int __R)
2577 {
2578 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2579 (__v8df) __B,
2580 (__v8df)
2581 _mm512_setzero_pd (),
2582 (__mmask8) __U, __R);
2583 }
2584
2585 extern __inline __m512
2586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2587 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2588 {
2589 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2590 (__v16sf) __B,
2591 (__v16sf)
2592 _mm512_undefined_ps (),
2593 (__mmask16) -1, __R);
2594 }
2595
2596 extern __inline __m512
2597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2598 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2599 __m512 __B, const int __R)
2600 {
2601 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2602 (__v16sf) __B,
2603 (__v16sf) __W,
2604 (__mmask16) __U, __R);
2605 }
2606
2607 extern __inline __m512
2608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2609 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2610 {
2611 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2612 (__v16sf) __B,
2613 (__v16sf)
2614 _mm512_setzero_ps (),
2615 (__mmask16) __U, __R);
2616 }
2617 #else
2618 #define _mm512_max_round_pd(A, B, R) \
2619 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2620
2621 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2622 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2623
2624 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2625 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2626
2627 #define _mm512_max_round_ps(A, B, R) \
2628 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2629
2630 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2631 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2632
2633 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2634 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2635
2636 #define _mm512_min_round_pd(A, B, R) \
2637 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2638
2639 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2640 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2641
2642 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2643 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2644
2645 #define _mm512_min_round_ps(A, B, R) \
2646 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2647
2648 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2649 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2650
2651 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2652 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2653 #endif
2654
2655 #ifdef __OPTIMIZE__
2656 extern __inline __m512d
2657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2658 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2659 {
2660 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2661 (__v8df) __B,
2662 (__v8df)
2663 _mm512_undefined_pd (),
2664 (__mmask8) -1, __R);
2665 }
2666
2667 extern __inline __m512d
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2670 __m512d __B, const int __R)
2671 {
2672 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2673 (__v8df) __B,
2674 (__v8df) __W,
2675 (__mmask8) __U, __R);
2676 }
2677
2678 extern __inline __m512d
2679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2681 const int __R)
2682 {
2683 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2684 (__v8df) __B,
2685 (__v8df)
2686 _mm512_setzero_pd (),
2687 (__mmask8) __U, __R);
2688 }
2689
2690 extern __inline __m512
2691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2693 {
2694 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2695 (__v16sf) __B,
2696 (__v16sf)
2697 _mm512_undefined_ps (),
2698 (__mmask16) -1, __R);
2699 }
2700
2701 extern __inline __m512
2702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2703 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2704 __m512 __B, const int __R)
2705 {
2706 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2707 (__v16sf) __B,
2708 (__v16sf) __W,
2709 (__mmask16) __U, __R);
2710 }
2711
2712 extern __inline __m512
2713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2714 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2715 const int __R)
2716 {
2717 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2718 (__v16sf) __B,
2719 (__v16sf)
2720 _mm512_setzero_ps (),
2721 (__mmask16) __U, __R);
2722 }
2723
2724 extern __inline __m128d
2725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2727 {
2728 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2729 (__v2df) __B,
2730 __R);
2731 }
2732
2733 extern __inline __m128
2734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2735 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2736 {
2737 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2738 (__v4sf) __B,
2739 __R);
2740 }
2741 #else
2742 #define _mm512_scalef_round_pd(A, B, C) \
2743 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2744
2745 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2746 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2747
2748 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2749 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2750
2751 #define _mm512_scalef_round_ps(A, B, C) \
2752 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2753
2754 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2755 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2756
2757 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2758 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2759
2760 #define _mm_scalef_round_sd(A, B, C) \
2761 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2762
2763 #define _mm_scalef_round_ss(A, B, C) \
2764 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2765 #endif
2766
2767 #ifdef __OPTIMIZE__
2768 extern __inline __m512d
2769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2770 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2771 {
2772 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2773 (__v8df) __B,
2774 (__v8df) __C,
2775 (__mmask8) -1, __R);
2776 }
2777
2778 extern __inline __m512d
2779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2780 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2781 __m512d __C, const int __R)
2782 {
2783 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2784 (__v8df) __B,
2785 (__v8df) __C,
2786 (__mmask8) __U, __R);
2787 }
2788
2789 extern __inline __m512d
2790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2791 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2792 __mmask8 __U, const int __R)
2793 {
2794 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2795 (__v8df) __B,
2796 (__v8df) __C,
2797 (__mmask8) __U, __R);
2798 }
2799
2800 extern __inline __m512d
2801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2802 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2803 __m512d __C, const int __R)
2804 {
2805 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2806 (__v8df) __B,
2807 (__v8df) __C,
2808 (__mmask8) __U, __R);
2809 }
2810
2811 extern __inline __m512
2812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2813 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2814 {
2815 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2816 (__v16sf) __B,
2817 (__v16sf) __C,
2818 (__mmask16) -1, __R);
2819 }
2820
2821 extern __inline __m512
2822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2823 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2824 __m512 __C, const int __R)
2825 {
2826 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2827 (__v16sf) __B,
2828 (__v16sf) __C,
2829 (__mmask16) __U, __R);
2830 }
2831
2832 extern __inline __m512
2833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2835 __mmask16 __U, const int __R)
2836 {
2837 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2838 (__v16sf) __B,
2839 (__v16sf) __C,
2840 (__mmask16) __U, __R);
2841 }
2842
2843 extern __inline __m512
2844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2846 __m512 __C, const int __R)
2847 {
2848 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2849 (__v16sf) __B,
2850 (__v16sf) __C,
2851 (__mmask16) __U, __R);
2852 }
2853
2854 extern __inline __m512d
2855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2856 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2857 {
2858 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2859 (__v8df) __B,
2860 -(__v8df) __C,
2861 (__mmask8) -1, __R);
2862 }
2863
2864 extern __inline __m512d
2865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2866 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2867 __m512d __C, const int __R)
2868 {
2869 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2870 (__v8df) __B,
2871 -(__v8df) __C,
2872 (__mmask8) __U, __R);
2873 }
2874
2875 extern __inline __m512d
2876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2877 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2878 __mmask8 __U, const int __R)
2879 {
2880 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2881 (__v8df) __B,
2882 (__v8df) __C,
2883 (__mmask8) __U, __R);
2884 }
2885
2886 extern __inline __m512d
2887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2888 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2889 __m512d __C, const int __R)
2890 {
2891 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2892 (__v8df) __B,
2893 -(__v8df) __C,
2894 (__mmask8) __U, __R);
2895 }
2896
2897 extern __inline __m512
2898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2899 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2900 {
2901 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2902 (__v16sf) __B,
2903 -(__v16sf) __C,
2904 (__mmask16) -1, __R);
2905 }
2906
2907 extern __inline __m512
2908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2909 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2910 __m512 __C, const int __R)
2911 {
2912 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2913 (__v16sf) __B,
2914 -(__v16sf) __C,
2915 (__mmask16) __U, __R);
2916 }
2917
2918 extern __inline __m512
2919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2920 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2921 __mmask16 __U, const int __R)
2922 {
2923 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2924 (__v16sf) __B,
2925 (__v16sf) __C,
2926 (__mmask16) __U, __R);
2927 }
2928
2929 extern __inline __m512
2930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2931 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2932 __m512 __C, const int __R)
2933 {
2934 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2935 (__v16sf) __B,
2936 -(__v16sf) __C,
2937 (__mmask16) __U, __R);
2938 }
2939
2940 extern __inline __m512d
2941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2942 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2943 {
2944 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2945 (__v8df) __B,
2946 (__v8df) __C,
2947 (__mmask8) -1, __R);
2948 }
2949
2950 extern __inline __m512d
2951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2952 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2953 __m512d __C, const int __R)
2954 {
2955 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2956 (__v8df) __B,
2957 (__v8df) __C,
2958 (__mmask8) __U, __R);
2959 }
2960
2961 extern __inline __m512d
2962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2963 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2964 __mmask8 __U, const int __R)
2965 {
2966 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2967 (__v8df) __B,
2968 (__v8df) __C,
2969 (__mmask8) __U, __R);
2970 }
2971
2972 extern __inline __m512d
2973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2974 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2975 __m512d __C, const int __R)
2976 {
2977 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2978 (__v8df) __B,
2979 (__v8df) __C,
2980 (__mmask8) __U, __R);
2981 }
2982
2983 extern __inline __m512
2984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2985 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2986 {
2987 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2988 (__v16sf) __B,
2989 (__v16sf) __C,
2990 (__mmask16) -1, __R);
2991 }
2992
2993 extern __inline __m512
2994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2995 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2996 __m512 __C, const int __R)
2997 {
2998 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2999 (__v16sf) __B,
3000 (__v16sf) __C,
3001 (__mmask16) __U, __R);
3002 }
3003
3004 extern __inline __m512
3005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3007 __mmask16 __U, const int __R)
3008 {
3009 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3010 (__v16sf) __B,
3011 (__v16sf) __C,
3012 (__mmask16) __U, __R);
3013 }
3014
3015 extern __inline __m512
3016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3017 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3018 __m512 __C, const int __R)
3019 {
3020 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3021 (__v16sf) __B,
3022 (__v16sf) __C,
3023 (__mmask16) __U, __R);
3024 }
3025
3026 extern __inline __m512d
3027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3028 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3029 {
3030 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3031 (__v8df) __B,
3032 -(__v8df) __C,
3033 (__mmask8) -1, __R);
3034 }
3035
3036 extern __inline __m512d
3037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3038 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3039 __m512d __C, const int __R)
3040 {
3041 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3042 (__v8df) __B,
3043 -(__v8df) __C,
3044 (__mmask8) __U, __R);
3045 }
3046
3047 extern __inline __m512d
3048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3049 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3050 __mmask8 __U, const int __R)
3051 {
3052 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3053 (__v8df) __B,
3054 (__v8df) __C,
3055 (__mmask8) __U, __R);
3056 }
3057
3058 extern __inline __m512d
3059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3060 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3061 __m512d __C, const int __R)
3062 {
3063 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3064 (__v8df) __B,
3065 -(__v8df) __C,
3066 (__mmask8) __U, __R);
3067 }
3068
3069 extern __inline __m512
3070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3071 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3072 {
3073 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3074 (__v16sf) __B,
3075 -(__v16sf) __C,
3076 (__mmask16) -1, __R);
3077 }
3078
3079 extern __inline __m512
3080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3081 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3082 __m512 __C, const int __R)
3083 {
3084 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3085 (__v16sf) __B,
3086 -(__v16sf) __C,
3087 (__mmask16) __U, __R);
3088 }
3089
3090 extern __inline __m512
3091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3092 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3093 __mmask16 __U, const int __R)
3094 {
3095 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3096 (__v16sf) __B,
3097 (__v16sf) __C,
3098 (__mmask16) __U, __R);
3099 }
3100
3101 extern __inline __m512
3102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3103 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3104 __m512 __C, const int __R)
3105 {
3106 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3107 (__v16sf) __B,
3108 -(__v16sf) __C,
3109 (__mmask16) __U, __R);
3110 }
3111
3112 extern __inline __m512d
3113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3114 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3115 {
3116 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3117 (__v8df) __B,
3118 (__v8df) __C,
3119 (__mmask8) -1, __R);
3120 }
3121
3122 extern __inline __m512d
3123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3124 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3125 __m512d __C, const int __R)
3126 {
3127 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3128 (__v8df) __B,
3129 (__v8df) __C,
3130 (__mmask8) __U, __R);
3131 }
3132
3133 extern __inline __m512d
3134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3135 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3136 __mmask8 __U, const int __R)
3137 {
3138 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3139 (__v8df) __B,
3140 (__v8df) __C,
3141 (__mmask8) __U, __R);
3142 }
3143
3144 extern __inline __m512d
3145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3146 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3147 __m512d __C, const int __R)
3148 {
3149 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3150 (__v8df) __B,
3151 (__v8df) __C,
3152 (__mmask8) __U, __R);
3153 }
3154
3155 extern __inline __m512
3156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3157 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3158 {
3159 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3160 (__v16sf) __B,
3161 (__v16sf) __C,
3162 (__mmask16) -1, __R);
3163 }
3164
3165 extern __inline __m512
3166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3167 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3168 __m512 __C, const int __R)
3169 {
3170 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3171 (__v16sf) __B,
3172 (__v16sf) __C,
3173 (__mmask16) __U, __R);
3174 }
3175
3176 extern __inline __m512
3177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3178 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3179 __mmask16 __U, const int __R)
3180 {
3181 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3182 (__v16sf) __B,
3183 (__v16sf) __C,
3184 (__mmask16) __U, __R);
3185 }
3186
3187 extern __inline __m512
3188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3189 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3190 __m512 __C, const int __R)
3191 {
3192 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3193 (__v16sf) __B,
3194 (__v16sf) __C,
3195 (__mmask16) __U, __R);
3196 }
3197
3198 extern __inline __m512d
3199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3200 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3201 {
3202 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3203 (__v8df) __B,
3204 -(__v8df) __C,
3205 (__mmask8) -1, __R);
3206 }
3207
3208 extern __inline __m512d
3209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3210 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3211 __m512d __C, const int __R)
3212 {
3213 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3214 (__v8df) __B,
3215 (__v8df) __C,
3216 (__mmask8) __U, __R);
3217 }
3218
3219 extern __inline __m512d
3220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3221 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3222 __mmask8 __U, const int __R)
3223 {
3224 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3225 (__v8df) __B,
3226 (__v8df) __C,
3227 (__mmask8) __U, __R);
3228 }
3229
3230 extern __inline __m512d
3231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3232 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3233 __m512d __C, const int __R)
3234 {
3235 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3236 (__v8df) __B,
3237 -(__v8df) __C,
3238 (__mmask8) __U, __R);
3239 }
3240
3241 extern __inline __m512
3242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3243 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3244 {
3245 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3246 (__v16sf) __B,
3247 -(__v16sf) __C,
3248 (__mmask16) -1, __R);
3249 }
3250
3251 extern __inline __m512
3252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3253 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3254 __m512 __C, const int __R)
3255 {
3256 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3257 (__v16sf) __B,
3258 (__v16sf) __C,
3259 (__mmask16) __U, __R);
3260 }
3261
3262 extern __inline __m512
3263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3265 __mmask16 __U, const int __R)
3266 {
3267 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3268 (__v16sf) __B,
3269 (__v16sf) __C,
3270 (__mmask16) __U, __R);
3271 }
3272
3273 extern __inline __m512
3274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3276 __m512 __C, const int __R)
3277 {
3278 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3279 (__v16sf) __B,
3280 -(__v16sf) __C,
3281 (__mmask16) __U, __R);
3282 }
3283 #else
3284 #define _mm512_fmadd_round_pd(A, B, C, R) \
3285 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3286
3287 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3288 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3289
3290 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3291 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3292
3293 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3294 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3295
3296 #define _mm512_fmadd_round_ps(A, B, C, R) \
3297 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3298
3299 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3300 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3301
3302 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3303 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3304
3305 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3306 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3307
3308 #define _mm512_fmsub_round_pd(A, B, C, R) \
3309 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3310
3311 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3312 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3313
3314 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3315 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3316
3317 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3318 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3319
3320 #define _mm512_fmsub_round_ps(A, B, C, R) \
3321 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3322
3323 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3324 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3325
3326 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3327 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3328
3329 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3330 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3331
3332 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3333 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3334
3335 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3336 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3337
3338 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3339 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3340
3341 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3342 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3343
3344 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3345 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3346
3347 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3348 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3349
3350 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3351 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3352
3353 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3354 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3355
3356 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3357 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3358
3359 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3360 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3361
3362 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3363 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3364
3365 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3366 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3367
3368 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3369 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3370
3371 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3372 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3373
3374 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3375 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3376
3377 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3378 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3379
3380 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3381 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3382
3383 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3384 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3385
3386 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3387 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3388
3389 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3390 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3391
3392 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3393 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3394
3395 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3396 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3397
3398 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3399 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3400
3401 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3402 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3403
3404 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3405 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3406
3407 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3408 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3409
3410 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3411 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3412
3413 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3414 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3415
3416 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3417 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3418
3419 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3420 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3421
3422 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3423 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3424
3425 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3426 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3427 #endif
3428
3429 extern __inline __m512i
3430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3431 _mm512_abs_epi64 (__m512i __A)
3432 {
3433 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3434 (__v8di)
3435 _mm512_undefined_epi32 (),
3436 (__mmask8) -1);
3437 }
3438
3439 extern __inline __m512i
3440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3441 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3442 {
3443 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3444 (__v8di) __W,
3445 (__mmask8) __U);
3446 }
3447
3448 extern __inline __m512i
3449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3450 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3451 {
3452 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3453 (__v8di)
3454 _mm512_setzero_si512 (),
3455 (__mmask8) __U);
3456 }
3457
3458 extern __inline __m512i
3459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3460 _mm512_abs_epi32 (__m512i __A)
3461 {
3462 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3463 (__v16si)
3464 _mm512_undefined_epi32 (),
3465 (__mmask16) -1);
3466 }
3467
3468 extern __inline __m512i
3469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3470 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3471 {
3472 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3473 (__v16si) __W,
3474 (__mmask16) __U);
3475 }
3476
3477 extern __inline __m512i
3478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3480 {
3481 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3482 (__v16si)
3483 _mm512_setzero_si512 (),
3484 (__mmask16) __U);
3485 }
3486
3487 extern __inline __m512
3488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3489 _mm512_broadcastss_ps (__m128 __A)
3490 {
3491 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3492 (__v16sf)
3493 _mm512_undefined_ps (),
3494 (__mmask16) -1);
3495 }
3496
3497 extern __inline __m512
3498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3499 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3500 {
3501 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3502 (__v16sf) __O, __M);
3503 }
3504
3505 extern __inline __m512
3506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3507 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3508 {
3509 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3510 (__v16sf)
3511 _mm512_setzero_ps (),
3512 __M);
3513 }
3514
3515 extern __inline __m512d
3516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3517 _mm512_broadcastsd_pd (__m128d __A)
3518 {
3519 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3520 (__v8df)
3521 _mm512_undefined_pd (),
3522 (__mmask8) -1);
3523 }
3524
3525 extern __inline __m512d
3526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3527 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3528 {
3529 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3530 (__v8df) __O, __M);
3531 }
3532
3533 extern __inline __m512d
3534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3536 {
3537 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3538 (__v8df)
3539 _mm512_setzero_pd (),
3540 __M);
3541 }
3542
3543 extern __inline __m512i
3544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545 _mm512_broadcastd_epi32 (__m128i __A)
3546 {
3547 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3548 (__v16si)
3549 _mm512_undefined_epi32 (),
3550 (__mmask16) -1);
3551 }
3552
3553 extern __inline __m512i
3554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3555 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3556 {
3557 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3558 (__v16si) __O, __M);
3559 }
3560
3561 extern __inline __m512i
3562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3563 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3564 {
3565 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3566 (__v16si)
3567 _mm512_setzero_si512 (),
3568 __M);
3569 }
3570
3571 extern __inline __m512i
3572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3573 _mm512_set1_epi32 (int __A)
3574 {
3575 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3576 (__v16si)
3577 _mm512_undefined_epi32 (),
3578 (__mmask16)(-1));
3579 }
3580
3581 extern __inline __m512i
3582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3583 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3584 {
3585 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3586 __M);
3587 }
3588
3589 extern __inline __m512i
3590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3591 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3592 {
3593 return (__m512i)
3594 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3595 (__v16si) _mm512_setzero_si512 (),
3596 __M);
3597 }
3598
3599 extern __inline __m512i
3600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3601 _mm512_broadcastq_epi64 (__m128i __A)
3602 {
3603 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3604 (__v8di)
3605 _mm512_undefined_epi32 (),
3606 (__mmask8) -1);
3607 }
3608
3609 extern __inline __m512i
3610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3612 {
3613 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3614 (__v8di) __O, __M);
3615 }
3616
3617 extern __inline __m512i
3618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3619 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3620 {
3621 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3622 (__v8di)
3623 _mm512_setzero_si512 (),
3624 __M);
3625 }
3626
3627 extern __inline __m512i
3628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3629 _mm512_set1_epi64 (long long __A)
3630 {
3631 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3632 (__v8di)
3633 _mm512_undefined_epi32 (),
3634 (__mmask8)(-1));
3635 }
3636
3637 extern __inline __m512i
3638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3639 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3640 {
3641 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3642 __M);
3643 }
3644
3645 extern __inline __m512i
3646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3647 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3648 {
3649 return (__m512i)
3650 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3651 (__v8di) _mm512_setzero_si512 (),
3652 __M);
3653 }
3654
3655 extern __inline __m512
3656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3657 _mm512_broadcast_f32x4 (__m128 __A)
3658 {
3659 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3660 (__v16sf)
3661 _mm512_undefined_ps (),
3662 (__mmask16) -1);
3663 }
3664
3665 extern __inline __m512
3666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3667 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3668 {
3669 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3670 (__v16sf) __O,
3671 __M);
3672 }
3673
3674 extern __inline __m512
3675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3676 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3677 {
3678 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3679 (__v16sf)
3680 _mm512_setzero_ps (),
3681 __M);
3682 }
3683
3684 extern __inline __m512i
3685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3686 _mm512_broadcast_i32x4 (__m128i __A)
3687 {
3688 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3689 (__v16si)
3690 _mm512_undefined_epi32 (),
3691 (__mmask16) -1);
3692 }
3693
3694 extern __inline __m512i
3695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3696 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3697 {
3698 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3699 (__v16si) __O,
3700 __M);
3701 }
3702
3703 extern __inline __m512i
3704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3705 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3706 {
3707 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3708 (__v16si)
3709 _mm512_setzero_si512 (),
3710 __M);
3711 }
3712
3713 extern __inline __m512d
3714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3715 _mm512_broadcast_f64x4 (__m256d __A)
3716 {
3717 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3718 (__v8df)
3719 _mm512_undefined_pd (),
3720 (__mmask8) -1);
3721 }
3722
3723 extern __inline __m512d
3724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3725 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3726 {
3727 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3728 (__v8df) __O,
3729 __M);
3730 }
3731
3732 extern __inline __m512d
3733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3734 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3735 {
3736 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3737 (__v8df)
3738 _mm512_setzero_pd (),
3739 __M);
3740 }
3741
3742 extern __inline __m512i
3743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3744 _mm512_broadcast_i64x4 (__m256i __A)
3745 {
3746 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3747 (__v8di)
3748 _mm512_undefined_epi32 (),
3749 (__mmask8) -1);
3750 }
3751
3752 extern __inline __m512i
3753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3754 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3755 {
3756 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3757 (__v8di) __O,
3758 __M);
3759 }
3760
3761 extern __inline __m512i
3762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3763 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3764 {
3765 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3766 (__v8di)
3767 _mm512_setzero_si512 (),
3768 __M);
3769 }
3770
3771 typedef enum
3772 {
3773 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3774 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3775 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3776 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3777 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3778 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3779 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3780 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3781 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3782 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3783 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3784 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3785 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3786 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3787 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3788 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3789 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3790 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3791 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3792 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3793 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3794 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3795 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3796 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3797 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3798 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3799 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3800 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3801 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3802 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3803 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3804 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3805 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3806 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3807 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3808 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3809 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3810 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3811 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3812 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3813 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3814 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3815 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3816 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3817 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3818 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3819 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3820 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3821 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3822 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3823 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3824 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3825 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3826 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3827 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3828 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3829 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3830 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3831 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3832 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3833 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3834 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3835 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3836 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3837 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3838 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3839 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3840 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3841 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3842 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3843 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3844 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3845 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3846 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3847 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3848 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3849 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3850 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3851 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3852 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3853 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3854 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3855 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3856 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3857 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3858 _MM_PERM_DDDD = 0xFF
3859 } _MM_PERM_ENUM;
3860
3861 #ifdef __OPTIMIZE__
3862 extern __inline __m512i
3863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3864 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3865 {
3866 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3867 __mask,
3868 (__v16si)
3869 _mm512_undefined_epi32 (),
3870 (__mmask16) -1);
3871 }
3872
3873 extern __inline __m512i
3874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3875 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3876 _MM_PERM_ENUM __mask)
3877 {
3878 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3879 __mask,
3880 (__v16si) __W,
3881 (__mmask16) __U);
3882 }
3883
3884 extern __inline __m512i
3885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3886 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3887 {
3888 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3889 __mask,
3890 (__v16si)
3891 _mm512_setzero_si512 (),
3892 (__mmask16) __U);
3893 }
3894
3895 extern __inline __m512i
3896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3897 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3898 {
3899 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3900 (__v8di) __B, __imm,
3901 (__v8di)
3902 _mm512_undefined_epi32 (),
3903 (__mmask8) -1);
3904 }
3905
3906 extern __inline __m512i
3907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3908 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3909 __m512i __B, const int __imm)
3910 {
3911 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3912 (__v8di) __B, __imm,
3913 (__v8di) __W,
3914 (__mmask8) __U);
3915 }
3916
3917 extern __inline __m512i
3918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3919 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3920 const int __imm)
3921 {
3922 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3923 (__v8di) __B, __imm,
3924 (__v8di)
3925 _mm512_setzero_si512 (),
3926 (__mmask8) __U);
3927 }
3928
3929 extern __inline __m512i
3930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3931 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3932 {
3933 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3934 (__v16si) __B,
3935 __imm,
3936 (__v16si)
3937 _mm512_undefined_epi32 (),
3938 (__mmask16) -1);
3939 }
3940
3941 extern __inline __m512i
3942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3944 __m512i __B, const int __imm)
3945 {
3946 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3947 (__v16si) __B,
3948 __imm,
3949 (__v16si) __W,
3950 (__mmask16) __U);
3951 }
3952
3953 extern __inline __m512i
3954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3955 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3956 const int __imm)
3957 {
3958 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3959 (__v16si) __B,
3960 __imm,
3961 (__v16si)
3962 _mm512_setzero_si512 (),
3963 (__mmask16) __U);
3964 }
3965
3966 extern __inline __m512d
3967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3968 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3969 {
3970 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3971 (__v8df) __B, __imm,
3972 (__v8df)
3973 _mm512_undefined_pd (),
3974 (__mmask8) -1);
3975 }
3976
3977 extern __inline __m512d
3978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3980 __m512d __B, const int __imm)
3981 {
3982 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3983 (__v8df) __B, __imm,
3984 (__v8df) __W,
3985 (__mmask8) __U);
3986 }
3987
3988 extern __inline __m512d
3989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3991 const int __imm)
3992 {
3993 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3994 (__v8df) __B, __imm,
3995 (__v8df)
3996 _mm512_setzero_pd (),
3997 (__mmask8) __U);
3998 }
3999
4000 extern __inline __m512
4001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4002 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4003 {
4004 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4005 (__v16sf) __B, __imm,
4006 (__v16sf)
4007 _mm512_undefined_ps (),
4008 (__mmask16) -1);
4009 }
4010
4011 extern __inline __m512
4012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4013 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4014 __m512 __B, const int __imm)
4015 {
4016 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4017 (__v16sf) __B, __imm,
4018 (__v16sf) __W,
4019 (__mmask16) __U);
4020 }
4021
4022 extern __inline __m512
4023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4024 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4025 const int __imm)
4026 {
4027 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4028 (__v16sf) __B, __imm,
4029 (__v16sf)
4030 _mm512_setzero_ps (),
4031 (__mmask16) __U);
4032 }
4033
4034 #else
4035 #define _mm512_shuffle_epi32(X, C) \
4036 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4037 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4038 (__mmask16)-1))
4039
4040 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4041 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4042 (__v16si)(__m512i)(W),\
4043 (__mmask16)(U)))
4044
4045 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4046 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4047 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4048 (__mmask16)(U)))
4049
4050 #define _mm512_shuffle_i64x2(X, Y, C) \
4051 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4052 (__v8di)(__m512i)(Y), (int)(C),\
4053 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4054 (__mmask8)-1))
4055
4056 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4057 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4058 (__v8di)(__m512i)(Y), (int)(C),\
4059 (__v8di)(__m512i)(W),\
4060 (__mmask8)(U)))
4061
4062 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4063 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4064 (__v8di)(__m512i)(Y), (int)(C),\
4065 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4066 (__mmask8)(U)))
4067
4068 #define _mm512_shuffle_i32x4(X, Y, C) \
4069 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4070 (__v16si)(__m512i)(Y), (int)(C),\
4071 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4072 (__mmask16)-1))
4073
4074 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4075 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4076 (__v16si)(__m512i)(Y), (int)(C),\
4077 (__v16si)(__m512i)(W),\
4078 (__mmask16)(U)))
4079
4080 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4081 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4082 (__v16si)(__m512i)(Y), (int)(C),\
4083 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4084 (__mmask16)(U)))
4085
4086 #define _mm512_shuffle_f64x2(X, Y, C) \
4087 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4088 (__v8df)(__m512d)(Y), (int)(C),\
4089 (__v8df)(__m512d)_mm512_undefined_pd(),\
4090 (__mmask8)-1))
4091
4092 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4093 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4094 (__v8df)(__m512d)(Y), (int)(C),\
4095 (__v8df)(__m512d)(W),\
4096 (__mmask8)(U)))
4097
4098 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4099 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4100 (__v8df)(__m512d)(Y), (int)(C),\
4101 (__v8df)(__m512d)_mm512_setzero_pd(),\
4102 (__mmask8)(U)))
4103
4104 #define _mm512_shuffle_f32x4(X, Y, C) \
4105 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4106 (__v16sf)(__m512)(Y), (int)(C),\
4107 (__v16sf)(__m512)_mm512_undefined_ps(),\
4108 (__mmask16)-1))
4109
4110 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4111 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4112 (__v16sf)(__m512)(Y), (int)(C),\
4113 (__v16sf)(__m512)(W),\
4114 (__mmask16)(U)))
4115
4116 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4117 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4118 (__v16sf)(__m512)(Y), (int)(C),\
4119 (__v16sf)(__m512)_mm512_setzero_ps(),\
4120 (__mmask16)(U)))
4121 #endif
4122
4123 extern __inline __m512i
4124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4125 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4126 {
4127 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4128 (__v16si) __B,
4129 (__v16si)
4130 _mm512_undefined_epi32 (),
4131 (__mmask16) -1);
4132 }
4133
4134 extern __inline __m512i
4135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4136 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4137 {
4138 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4139 (__v16si) __B,
4140 (__v16si) __W,
4141 (__mmask16) __U);
4142 }
4143
4144 extern __inline __m512i
4145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4146 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4147 {
4148 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4149 (__v16si) __B,
4150 (__v16si)
4151 _mm512_setzero_si512 (),
4152 (__mmask16) __U);
4153 }
4154
4155 extern __inline __m512i
4156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4157 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4158 {
4159 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4160 (__v16si) __B,
4161 (__v16si)
4162 _mm512_undefined_epi32 (),
4163 (__mmask16) -1);
4164 }
4165
4166 extern __inline __m512i
4167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4168 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4169 {
4170 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4171 (__v16si) __B,
4172 (__v16si) __W,
4173 (__mmask16) __U);
4174 }
4175
4176 extern __inline __m512i
4177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4178 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4179 {
4180 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4181 (__v16si) __B,
4182 (__v16si)
4183 _mm512_setzero_si512 (),
4184 (__mmask16) __U);
4185 }
4186
4187 extern __inline __m512i
4188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4189 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4190 {
4191 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4192 (__v8di) __B,
4193 (__v8di)
4194 _mm512_undefined_epi32 (),
4195 (__mmask8) -1);
4196 }
4197
4198 extern __inline __m512i
4199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4200 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4201 {
4202 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4203 (__v8di) __B,
4204 (__v8di) __W,
4205 (__mmask8) __U);
4206 }
4207
4208 extern __inline __m512i
4209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4210 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4211 {
4212 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4213 (__v8di) __B,
4214 (__v8di)
4215 _mm512_setzero_si512 (),
4216 (__mmask8) __U);
4217 }
4218
4219 extern __inline __m512i
4220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4221 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4222 {
4223 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4224 (__v8di) __B,
4225 (__v8di)
4226 _mm512_undefined_epi32 (),
4227 (__mmask8) -1);
4228 }
4229
4230 extern __inline __m512i
4231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4232 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4233 {
4234 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4235 (__v8di) __B,
4236 (__v8di) __W,
4237 (__mmask8) __U);
4238 }
4239
4240 extern __inline __m512i
4241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4242 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4243 {
4244 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4245 (__v8di) __B,
4246 (__v8di)
4247 _mm512_setzero_si512 (),
4248 (__mmask8) __U);
4249 }
4250
4251 #ifdef __OPTIMIZE__
4252 extern __inline __m256i
4253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4255 {
4256 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4257 (__v8si)
4258 _mm256_undefined_si256 (),
4259 (__mmask8) -1, __R);
4260 }
4261
4262 extern __inline __m256i
4263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4264 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4265 const int __R)
4266 {
4267 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4268 (__v8si) __W,
4269 (__mmask8) __U, __R);
4270 }
4271
4272 extern __inline __m256i
4273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4274 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4275 {
4276 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4277 (__v8si)
4278 _mm256_setzero_si256 (),
4279 (__mmask8) __U, __R);
4280 }
4281
4282 extern __inline __m256i
4283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4284 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4285 {
4286 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4287 (__v8si)
4288 _mm256_undefined_si256 (),
4289 (__mmask8) -1, __R);
4290 }
4291
4292 extern __inline __m256i
4293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4294 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4295 const int __R)
4296 {
4297 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4298 (__v8si) __W,
4299 (__mmask8) __U, __R);
4300 }
4301
4302 extern __inline __m256i
4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4305 {
4306 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4307 (__v8si)
4308 _mm256_setzero_si256 (),
4309 (__mmask8) __U, __R);
4310 }
4311 #else
4312 #define _mm512_cvtt_roundpd_epi32(A, B) \
4313 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4314
4315 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4316 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4317
4318 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4319 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4320
4321 #define _mm512_cvtt_roundpd_epu32(A, B) \
4322 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4323
4324 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4325 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4326
4327 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4328 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4329 #endif
4330
4331 #ifdef __OPTIMIZE__
4332 extern __inline __m256i
4333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4334 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4335 {
4336 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4337 (__v8si)
4338 _mm256_undefined_si256 (),
4339 (__mmask8) -1, __R);
4340 }
4341
4342 extern __inline __m256i
4343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4344 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4345 const int __R)
4346 {
4347 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4348 (__v8si) __W,
4349 (__mmask8) __U, __R);
4350 }
4351
4352 extern __inline __m256i
4353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4354 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4355 {
4356 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4357 (__v8si)
4358 _mm256_setzero_si256 (),
4359 (__mmask8) __U, __R);
4360 }
4361
4362 extern __inline __m256i
4363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4364 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4365 {
4366 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4367 (__v8si)
4368 _mm256_undefined_si256 (),
4369 (__mmask8) -1, __R);
4370 }
4371
4372 extern __inline __m256i
4373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4374 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4375 const int __R)
4376 {
4377 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4378 (__v8si) __W,
4379 (__mmask8) __U, __R);
4380 }
4381
4382 extern __inline __m256i
4383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4384 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4385 {
4386 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4387 (__v8si)
4388 _mm256_setzero_si256 (),
4389 (__mmask8) __U, __R);
4390 }
4391 #else
4392 #define _mm512_cvt_roundpd_epi32(A, B) \
4393 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4394
4395 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4396 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4397
4398 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4399 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4400
4401 #define _mm512_cvt_roundpd_epu32(A, B) \
4402 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4403
4404 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4405 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4406
4407 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4408 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4409 #endif
4410
4411 #ifdef __OPTIMIZE__
4412 extern __inline __m512i
4413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4415 {
4416 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4417 (__v16si)
4418 _mm512_undefined_epi32 (),
4419 (__mmask16) -1, __R);
4420 }
4421
4422 extern __inline __m512i
4423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4424 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4425 const int __R)
4426 {
4427 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4428 (__v16si) __W,
4429 (__mmask16) __U, __R);
4430 }
4431
4432 extern __inline __m512i
4433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4434 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4435 {
4436 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4437 (__v16si)
4438 _mm512_setzero_si512 (),
4439 (__mmask16) __U, __R);
4440 }
4441
4442 extern __inline __m512i
4443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4444 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4445 {
4446 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4447 (__v16si)
4448 _mm512_undefined_epi32 (),
4449 (__mmask16) -1, __R);
4450 }
4451
4452 extern __inline __m512i
4453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4454 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4455 const int __R)
4456 {
4457 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4458 (__v16si) __W,
4459 (__mmask16) __U, __R);
4460 }
4461
4462 extern __inline __m512i
4463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4464 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4465 {
4466 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4467 (__v16si)
4468 _mm512_setzero_si512 (),
4469 (__mmask16) __U, __R);
4470 }
4471 #else
4472 #define _mm512_cvtt_roundps_epi32(A, B) \
4473 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4474
4475 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4476 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4477
4478 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4479 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4480
4481 #define _mm512_cvtt_roundps_epu32(A, B) \
4482 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4483
4484 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4485 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4486
4487 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4488 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4489 #endif
4490
4491 #ifdef __OPTIMIZE__
4492 extern __inline __m512i
4493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4494 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4495 {
4496 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4497 (__v16si)
4498 _mm512_undefined_epi32 (),
4499 (__mmask16) -1, __R);
4500 }
4501
4502 extern __inline __m512i
4503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4504 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4505 const int __R)
4506 {
4507 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4508 (__v16si) __W,
4509 (__mmask16) __U, __R);
4510 }
4511
4512 extern __inline __m512i
4513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4515 {
4516 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4517 (__v16si)
4518 _mm512_setzero_si512 (),
4519 (__mmask16) __U, __R);
4520 }
4521
4522 extern __inline __m512i
4523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4525 {
4526 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4527 (__v16si)
4528 _mm512_undefined_epi32 (),
4529 (__mmask16) -1, __R);
4530 }
4531
4532 extern __inline __m512i
4533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4534 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4535 const int __R)
4536 {
4537 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4538 (__v16si) __W,
4539 (__mmask16) __U, __R);
4540 }
4541
4542 extern __inline __m512i
4543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4544 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4545 {
4546 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4547 (__v16si)
4548 _mm512_setzero_si512 (),
4549 (__mmask16) __U, __R);
4550 }
4551 #else
4552 #define _mm512_cvt_roundps_epi32(A, B) \
4553 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4554
4555 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4556 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4557
4558 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4559 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4560
4561 #define _mm512_cvt_roundps_epu32(A, B) \
4562 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4563
4564 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4565 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4566
4567 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4568 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4569 #endif
4570
4571 extern __inline __m128d
4572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4574 {
4575 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4576 }
4577
4578 #ifdef __x86_64__
4579 #ifdef __OPTIMIZE__
4580 extern __inline __m128d
4581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4582 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4583 {
4584 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4585 }
4586
4587 extern __inline __m128d
4588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4589 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4590 {
4591 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4592 }
4593
4594 extern __inline __m128d
4595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4596 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4597 {
4598 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4599 }
4600 #else
4601 #define _mm_cvt_roundu64_sd(A, B, C) \
4602 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4603
4604 #define _mm_cvt_roundi64_sd(A, B, C) \
4605 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4606
4607 #define _mm_cvt_roundsi64_sd(A, B, C) \
4608 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4609 #endif
4610
4611 #endif
4612
4613 #ifdef __OPTIMIZE__
4614 extern __inline __m128
4615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4616 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4617 {
4618 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4619 }
4620
4621 extern __inline __m128
4622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4623 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4624 {
4625 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4626 }
4627
4628 extern __inline __m128
4629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4630 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4631 {
4632 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4633 }
4634 #else
4635 #define _mm_cvt_roundu32_ss(A, B, C) \
4636 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4637
4638 #define _mm_cvt_roundi32_ss(A, B, C) \
4639 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4640
4641 #define _mm_cvt_roundsi32_ss(A, B, C) \
4642 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4643 #endif
4644
4645 #ifdef __x86_64__
4646 #ifdef __OPTIMIZE__
4647 extern __inline __m128
4648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4649 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4650 {
4651 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4652 }
4653
4654 extern __inline __m128
4655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4656 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4657 {
4658 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4659 }
4660
4661 extern __inline __m128
4662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4663 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4664 {
4665 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4666 }
4667 #else
4668 #define _mm_cvt_roundu64_ss(A, B, C) \
4669 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4670
4671 #define _mm_cvt_roundi64_ss(A, B, C) \
4672 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4673
4674 #define _mm_cvt_roundsi64_ss(A, B, C) \
4675 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4676 #endif
4677
4678 #endif
4679
4680 extern __inline __m128i
4681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682 _mm512_cvtepi32_epi8 (__m512i __A)
4683 {
4684 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4685 (__v16qi)
4686 _mm_undefined_si128 (),
4687 (__mmask16) -1);
4688 }
4689
4690 extern __inline void
4691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4692 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4693 {
4694 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4695 }
4696
4697 extern __inline __m128i
4698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4699 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4700 {
4701 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4702 (__v16qi) __O, __M);
4703 }
4704
4705 extern __inline __m128i
4706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4707 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4708 {
4709 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4710 (__v16qi)
4711 _mm_setzero_si128 (),
4712 __M);
4713 }
4714
4715 extern __inline __m128i
4716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4717 _mm512_cvtsepi32_epi8 (__m512i __A)
4718 {
4719 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4720 (__v16qi)
4721 _mm_undefined_si128 (),
4722 (__mmask16) -1);
4723 }
4724
4725 extern __inline void
4726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4727 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4728 {
4729 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4730 }
4731
4732 extern __inline __m128i
4733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4734 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4735 {
4736 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4737 (__v16qi) __O, __M);
4738 }
4739
4740 extern __inline __m128i
4741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4743 {
4744 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4745 (__v16qi)
4746 _mm_setzero_si128 (),
4747 __M);
4748 }
4749
4750 extern __inline __m128i
4751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4752 _mm512_cvtusepi32_epi8 (__m512i __A)
4753 {
4754 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4755 (__v16qi)
4756 _mm_undefined_si128 (),
4757 (__mmask16) -1);
4758 }
4759
4760 extern __inline void
4761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4763 {
4764 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4765 }
4766
4767 extern __inline __m128i
4768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4769 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4770 {
4771 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4772 (__v16qi) __O,
4773 __M);
4774 }
4775
4776 extern __inline __m128i
4777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4778 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4779 {
4780 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4781 (__v16qi)
4782 _mm_setzero_si128 (),
4783 __M);
4784 }
4785
4786 extern __inline __m256i
4787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788 _mm512_cvtepi32_epi16 (__m512i __A)
4789 {
4790 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4791 (__v16hi)
4792 _mm256_undefined_si256 (),
4793 (__mmask16) -1);
4794 }
4795
4796 extern __inline void
4797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4799 {
4800 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4801 }
4802
4803 extern __inline __m256i
4804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4806 {
4807 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4808 (__v16hi) __O, __M);
4809 }
4810
4811 extern __inline __m256i
4812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4813 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4814 {
4815 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4816 (__v16hi)
4817 _mm256_setzero_si256 (),
4818 __M);
4819 }
4820
4821 extern __inline __m256i
4822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4823 _mm512_cvtsepi32_epi16 (__m512i __A)
4824 {
4825 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4826 (__v16hi)
4827 _mm256_undefined_si256 (),
4828 (__mmask16) -1);
4829 }
4830
4831 extern __inline void
4832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4834 {
4835 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4836 }
4837
4838 extern __inline __m256i
4839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4840 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4841 {
4842 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4843 (__v16hi) __O, __M);
4844 }
4845
4846 extern __inline __m256i
4847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4848 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4849 {
4850 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4851 (__v16hi)
4852 _mm256_setzero_si256 (),
4853 __M);
4854 }
4855
4856 extern __inline __m256i
4857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4858 _mm512_cvtusepi32_epi16 (__m512i __A)
4859 {
4860 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4861 (__v16hi)
4862 _mm256_undefined_si256 (),
4863 (__mmask16) -1);
4864 }
4865
4866 extern __inline void
4867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4868 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4869 {
4870 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4871 }
4872
4873 extern __inline __m256i
4874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4875 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4876 {
4877 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4878 (__v16hi) __O,
4879 __M);
4880 }
4881
4882 extern __inline __m256i
4883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4884 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4885 {
4886 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4887 (__v16hi)
4888 _mm256_setzero_si256 (),
4889 __M);
4890 }
4891
4892 extern __inline __m256i
4893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894 _mm512_cvtepi64_epi32 (__m512i __A)
4895 {
4896 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4897 (__v8si)
4898 _mm256_undefined_si256 (),
4899 (__mmask8) -1);
4900 }
4901
4902 extern __inline void
4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4905 {
4906 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4907 }
4908
4909 extern __inline __m256i
4910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4911 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4912 {
4913 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4914 (__v8si) __O, __M);
4915 }
4916
4917 extern __inline __m256i
4918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4919 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4920 {
4921 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4922 (__v8si)
4923 _mm256_setzero_si256 (),
4924 __M);
4925 }
4926
4927 extern __inline __m256i
4928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4929 _mm512_cvtsepi64_epi32 (__m512i __A)
4930 {
4931 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4932 (__v8si)
4933 _mm256_undefined_si256 (),
4934 (__mmask8) -1);
4935 }
4936
4937 extern __inline void
4938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4939 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4940 {
4941 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4942 }
4943
4944 extern __inline __m256i
4945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4946 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4947 {
4948 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4949 (__v8si) __O, __M);
4950 }
4951
4952 extern __inline __m256i
4953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4954 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4955 {
4956 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4957 (__v8si)
4958 _mm256_setzero_si256 (),
4959 __M);
4960 }
4961
4962 extern __inline __m256i
4963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4964 _mm512_cvtusepi64_epi32 (__m512i __A)
4965 {
4966 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4967 (__v8si)
4968 _mm256_undefined_si256 (),
4969 (__mmask8) -1);
4970 }
4971
4972 extern __inline void
4973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4974 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4975 {
4976 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4977 }
4978
4979 extern __inline __m256i
4980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4981 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4982 {
4983 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4984 (__v8si) __O, __M);
4985 }
4986
4987 extern __inline __m256i
4988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4989 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4990 {
4991 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4992 (__v8si)
4993 _mm256_setzero_si256 (),
4994 __M);
4995 }
4996
4997 extern __inline __m128i
4998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4999 _mm512_cvtepi64_epi16 (__m512i __A)
5000 {
5001 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5002 (__v8hi)
5003 _mm_undefined_si128 (),
5004 (__mmask8) -1);
5005 }
5006
5007 extern __inline void
5008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5009 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5010 {
5011 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5012 }
5013
5014 extern __inline __m128i
5015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5016 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5017 {
5018 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5019 (__v8hi) __O, __M);
5020 }
5021
5022 extern __inline __m128i
5023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5024 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5025 {
5026 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5027 (__v8hi)
5028 _mm_setzero_si128 (),
5029 __M);
5030 }
5031
5032 extern __inline __m128i
5033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034 _mm512_cvtsepi64_epi16 (__m512i __A)
5035 {
5036 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5037 (__v8hi)
5038 _mm_undefined_si128 (),
5039 (__mmask8) -1);
5040 }
5041
5042 extern __inline void
5043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5045 {
5046 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5047 }
5048
5049 extern __inline __m128i
5050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5051 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5052 {
5053 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5054 (__v8hi) __O, __M);
5055 }
5056
5057 extern __inline __m128i
5058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5059 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5060 {
5061 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5062 (__v8hi)
5063 _mm_setzero_si128 (),
5064 __M);
5065 }
5066
5067 extern __inline __m128i
5068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5069 _mm512_cvtusepi64_epi16 (__m512i __A)
5070 {
5071 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5072 (__v8hi)
5073 _mm_undefined_si128 (),
5074 (__mmask8) -1);
5075 }
5076
5077 extern __inline void
5078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5079 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5080 {
5081 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5082 }
5083
5084 extern __inline __m128i
5085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5086 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5087 {
5088 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5089 (__v8hi) __O, __M);
5090 }
5091
5092 extern __inline __m128i
5093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5094 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5095 {
5096 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5097 (__v8hi)
5098 _mm_setzero_si128 (),
5099 __M);
5100 }
5101
5102 extern __inline __m128i
5103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5104 _mm512_cvtepi64_epi8 (__m512i __A)
5105 {
5106 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5107 (__v16qi)
5108 _mm_undefined_si128 (),
5109 (__mmask8) -1);
5110 }
5111
5112 extern __inline void
5113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5115 {
5116 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5117 }
5118
5119 extern __inline __m128i
5120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5122 {
5123 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5124 (__v16qi) __O, __M);
5125 }
5126
5127 extern __inline __m128i
5128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5129 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5130 {
5131 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5132 (__v16qi)
5133 _mm_setzero_si128 (),
5134 __M);
5135 }
5136
5137 extern __inline __m128i
5138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5139 _mm512_cvtsepi64_epi8 (__m512i __A)
5140 {
5141 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5142 (__v16qi)
5143 _mm_undefined_si128 (),
5144 (__mmask8) -1);
5145 }
5146
5147 extern __inline void
5148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5150 {
5151 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5152 }
5153
5154 extern __inline __m128i
5155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5156 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5157 {
5158 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5159 (__v16qi) __O, __M);
5160 }
5161
5162 extern __inline __m128i
5163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5164 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5165 {
5166 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5167 (__v16qi)
5168 _mm_setzero_si128 (),
5169 __M);
5170 }
5171
5172 extern __inline __m128i
5173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5174 _mm512_cvtusepi64_epi8 (__m512i __A)
5175 {
5176 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5177 (__v16qi)
5178 _mm_undefined_si128 (),
5179 (__mmask8) -1);
5180 }
5181
5182 extern __inline void
5183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5185 {
5186 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5187 }
5188
5189 extern __inline __m128i
5190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5191 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5192 {
5193 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5194 (__v16qi) __O,
5195 __M);
5196 }
5197
5198 extern __inline __m128i
5199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5200 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5201 {
5202 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5203 (__v16qi)
5204 _mm_setzero_si128 (),
5205 __M);
5206 }
5207
5208 extern __inline __m512d
5209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5210 _mm512_cvtepi32_pd (__m256i __A)
5211 {
5212 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5213 (__v8df)
5214 _mm512_undefined_pd (),
5215 (__mmask8) -1);
5216 }
5217
5218 extern __inline __m512d
5219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5220 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5221 {
5222 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5223 (__v8df) __W,
5224 (__mmask8) __U);
5225 }
5226
5227 extern __inline __m512d
5228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5229 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5230 {
5231 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5232 (__v8df)
5233 _mm512_setzero_pd (),
5234 (__mmask8) __U);
5235 }
5236
5237 extern __inline __m512d
5238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5239 _mm512_cvtepu32_pd (__m256i __A)
5240 {
5241 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5242 (__v8df)
5243 _mm512_undefined_pd (),
5244 (__mmask8) -1);
5245 }
5246
5247 extern __inline __m512d
5248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5249 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5250 {
5251 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5252 (__v8df) __W,
5253 (__mmask8) __U);
5254 }
5255
5256 extern __inline __m512d
5257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5258 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5259 {
5260 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5261 (__v8df)
5262 _mm512_setzero_pd (),
5263 (__mmask8) __U);
5264 }
5265
5266 #ifdef __OPTIMIZE__
5267 extern __inline __m512
5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5270 {
5271 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5272 (__v16sf)
5273 _mm512_undefined_ps (),
5274 (__mmask16) -1, __R);
5275 }
5276
5277 extern __inline __m512
5278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5279 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5280 const int __R)
5281 {
5282 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5283 (__v16sf) __W,
5284 (__mmask16) __U, __R);
5285 }
5286
5287 extern __inline __m512
5288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5289 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5290 {
5291 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5292 (__v16sf)
5293 _mm512_setzero_ps (),
5294 (__mmask16) __U, __R);
5295 }
5296
5297 extern __inline __m512
5298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5299 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5300 {
5301 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5302 (__v16sf)
5303 _mm512_undefined_ps (),
5304 (__mmask16) -1, __R);
5305 }
5306
5307 extern __inline __m512
5308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5309 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5310 const int __R)
5311 {
5312 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5313 (__v16sf) __W,
5314 (__mmask16) __U, __R);
5315 }
5316
5317 extern __inline __m512
5318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5319 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5320 {
5321 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5322 (__v16sf)
5323 _mm512_setzero_ps (),
5324 (__mmask16) __U, __R);
5325 }
5326
5327 #else
5328 #define _mm512_cvt_roundepi32_ps(A, B) \
5329 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5330
5331 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5332 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5333
5334 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5335 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5336
5337 #define _mm512_cvt_roundepu32_ps(A, B) \
5338 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5339
5340 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5341 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5342
5343 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5344 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5345 #endif
5346
5347 #ifdef __OPTIMIZE__
5348 extern __inline __m256d
5349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5350 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5351 {
5352 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5353 __imm,
5354 (__v4df)
5355 _mm256_undefined_pd (),
5356 (__mmask8) -1);
5357 }
5358
5359 extern __inline __m256d
5360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5361 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5362 const int __imm)
5363 {
5364 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5365 __imm,
5366 (__v4df) __W,
5367 (__mmask8) __U);
5368 }
5369
5370 extern __inline __m256d
5371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5372 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5373 {
5374 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5375 __imm,
5376 (__v4df)
5377 _mm256_setzero_pd (),
5378 (__mmask8) __U);
5379 }
5380
5381 extern __inline __m128
5382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5383 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5384 {
5385 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5386 __imm,
5387 (__v4sf)
5388 _mm_undefined_ps (),
5389 (__mmask8) -1);
5390 }
5391
5392 extern __inline __m128
5393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5394 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5395 const int __imm)
5396 {
5397 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5398 __imm,
5399 (__v4sf) __W,
5400 (__mmask8) __U);
5401 }
5402
5403 extern __inline __m128
5404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5405 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5406 {
5407 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5408 __imm,
5409 (__v4sf)
5410 _mm_setzero_ps (),
5411 (__mmask8) __U);
5412 }
5413
5414 extern __inline __m256i
5415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5416 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5417 {
5418 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5419 __imm,
5420 (__v4di)
5421 _mm256_undefined_si256 (),
5422 (__mmask8) -1);
5423 }
5424
5425 extern __inline __m256i
5426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5427 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5428 const int __imm)
5429 {
5430 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5431 __imm,
5432 (__v4di) __W,
5433 (__mmask8) __U);
5434 }
5435
5436 extern __inline __m256i
5437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5438 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5439 {
5440 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5441 __imm,
5442 (__v4di)
5443 _mm256_setzero_si256 (),
5444 (__mmask8) __U);
5445 }
5446
5447 extern __inline __m128i
5448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5449 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5450 {
5451 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5452 __imm,
5453 (__v4si)
5454 _mm_undefined_si128 (),
5455 (__mmask8) -1);
5456 }
5457
5458 extern __inline __m128i
5459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5460 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5461 const int __imm)
5462 {
5463 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5464 __imm,
5465 (__v4si) __W,
5466 (__mmask8) __U);
5467 }
5468
5469 extern __inline __m128i
5470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5471 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5472 {
5473 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5474 __imm,
5475 (__v4si)
5476 _mm_setzero_si128 (),
5477 (__mmask8) __U);
5478 }
5479 #else
5480
5481 #define _mm512_extractf64x4_pd(X, C) \
5482 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5483 (int) (C),\
5484 (__v4df)(__m256d)_mm256_undefined_pd(),\
5485 (__mmask8)-1))
5486
5487 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5488 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5489 (int) (C),\
5490 (__v4df)(__m256d)(W),\
5491 (__mmask8)(U)))
5492
5493 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5494 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5495 (int) (C),\
5496 (__v4df)(__m256d)_mm256_setzero_pd(),\
5497 (__mmask8)(U)))
5498
5499 #define _mm512_extractf32x4_ps(X, C) \
5500 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5501 (int) (C),\
5502 (__v4sf)(__m128)_mm_undefined_ps(),\
5503 (__mmask8)-1))
5504
5505 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5506 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5507 (int) (C),\
5508 (__v4sf)(__m128)(W),\
5509 (__mmask8)(U)))
5510
5511 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5512 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5513 (int) (C),\
5514 (__v4sf)(__m128)_mm_setzero_ps(),\
5515 (__mmask8)(U)))
5516
5517 #define _mm512_extracti64x4_epi64(X, C) \
5518 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5519 (int) (C),\
5520 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5521 (__mmask8)-1))
5522
5523 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5524 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5525 (int) (C),\
5526 (__v4di)(__m256i)(W),\
5527 (__mmask8)(U)))
5528
5529 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5530 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5531 (int) (C),\
5532 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5533 (__mmask8)(U)))
5534
5535 #define _mm512_extracti32x4_epi32(X, C) \
5536 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5537 (int) (C),\
5538 (__v4si)(__m128i)_mm_undefined_si128 (),\
5539 (__mmask8)-1))
5540
5541 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5542 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5543 (int) (C),\
5544 (__v4si)(__m128i)(W),\
5545 (__mmask8)(U)))
5546
5547 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5548 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5549 (int) (C),\
5550 (__v4si)(__m128i)_mm_setzero_si128 (),\
5551 (__mmask8)(U)))
5552 #endif
5553
5554 #ifdef __OPTIMIZE__
5555 extern __inline __m512i
5556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5557 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5558 {
5559 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5560 (__v4si) __B,
5561 __imm,
5562 (__v16si) __A, -1);
5563 }
5564
5565 extern __inline __m512
5566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5567 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5568 {
5569 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5570 (__v4sf) __B,
5571 __imm,
5572 (__v16sf) __A, -1);
5573 }
5574
5575 extern __inline __m512i
5576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5578 {
5579 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5580 (__v4di) __B,
5581 __imm,
5582 (__v8di)
5583 _mm512_undefined_epi32 (),
5584 (__mmask8) -1);
5585 }
5586
5587 extern __inline __m512i
5588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5589 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5590 __m256i __B, const int __imm)
5591 {
5592 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5593 (__v4di) __B,
5594 __imm,
5595 (__v8di) __W,
5596 (__mmask8) __U);
5597 }
5598
5599 extern __inline __m512i
5600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5601 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5602 const int __imm)
5603 {
5604 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5605 (__v4di) __B,
5606 __imm,
5607 (__v8di)
5608 _mm512_setzero_si512 (),
5609 (__mmask8) __U);
5610 }
5611
5612 extern __inline __m512d
5613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5615 {
5616 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5617 (__v4df) __B,
5618 __imm,
5619 (__v8df)
5620 _mm512_undefined_pd (),
5621 (__mmask8) -1);
5622 }
5623
5624 extern __inline __m512d
5625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5626 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5627 __m256d __B, const int __imm)
5628 {
5629 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5630 (__v4df) __B,
5631 __imm,
5632 (__v8df) __W,
5633 (__mmask8) __U);
5634 }
5635
5636 extern __inline __m512d
5637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5638 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5639 const int __imm)
5640 {
5641 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5642 (__v4df) __B,
5643 __imm,
5644 (__v8df)
5645 _mm512_setzero_pd (),
5646 (__mmask8) __U);
5647 }
5648 #else
5649 #define _mm512_insertf32x4(X, Y, C) \
5650 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5651 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5652
5653 #define _mm512_inserti32x4(X, Y, C) \
5654 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5655 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5656
5657 #define _mm512_insertf64x4(X, Y, C) \
5658 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5659 (__v4df)(__m256d) (Y), (int) (C), \
5660 (__v8df)(__m512d)_mm512_undefined_pd(), \
5661 (__mmask8)-1))
5662
5663 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5664 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5665 (__v4df)(__m256d) (Y), (int) (C), \
5666 (__v8df)(__m512d)(W), \
5667 (__mmask8)(U)))
5668
5669 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5670 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5671 (__v4df)(__m256d) (Y), (int) (C), \
5672 (__v8df)(__m512d)_mm512_setzero_pd(), \
5673 (__mmask8)(U)))
5674
5675 #define _mm512_inserti64x4(X, Y, C) \
5676 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5677 (__v4di)(__m256i) (Y), (int) (C), \
5678 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
5679 (__mmask8)-1))
5680
5681 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5682 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5683 (__v4di)(__m256i) (Y), (int) (C),\
5684 (__v8di)(__m512i)(W),\
5685 (__mmask8)(U)))
5686
5687 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5688 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5689 (__v4di)(__m256i) (Y), (int) (C), \
5690 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5691 (__mmask8)(U)))
5692 #endif
5693
5694 extern __inline __m512d
5695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696 _mm512_loadu_pd (void const *__P)
5697 {
5698 return *(__m512d_u *)__P;
5699 }
5700
5701 extern __inline __m512d
5702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5703 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5704 {
5705 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
5706 (__v8df) __W,
5707 (__mmask8) __U);
5708 }
5709
5710 extern __inline __m512d
5711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5712 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5713 {
5714 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
5715 (__v8df)
5716 _mm512_setzero_pd (),
5717 (__mmask8) __U);
5718 }
5719
5720 extern __inline void
5721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5722 _mm512_storeu_pd (void *__P, __m512d __A)
5723 {
5724 *(__m512d_u *)__P = __A;
5725 }
5726
5727 extern __inline void
5728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5730 {
5731 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
5732 (__mmask8) __U);
5733 }
5734
5735 extern __inline __m512
5736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5737 _mm512_loadu_ps (void const *__P)
5738 {
5739 return *(__m512_u *)__P;
5740 }
5741
5742 extern __inline __m512
5743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5744 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5745 {
5746 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
5747 (__v16sf) __W,
5748 (__mmask16) __U);
5749 }
5750
5751 extern __inline __m512
5752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5753 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5754 {
5755 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
5756 (__v16sf)
5757 _mm512_setzero_ps (),
5758 (__mmask16) __U);
5759 }
5760
5761 extern __inline void
5762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763 _mm512_storeu_ps (void *__P, __m512 __A)
5764 {
5765 *(__m512_u *)__P = __A;
5766 }
5767
5768 extern __inline void
5769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5770 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5771 {
5772 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
5773 (__mmask16) __U);
5774 }
5775
5776 extern __inline __m512i
5777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5778 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5779 {
5780 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
5781 (__v8di) __W,
5782 (__mmask8) __U);
5783 }
5784
5785 extern __inline __m512i
5786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5787 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5788 {
5789 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
5790 (__v8di)
5791 _mm512_setzero_si512 (),
5792 (__mmask8) __U);
5793 }
5794
5795 extern __inline void
5796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5797 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5798 {
5799 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
5800 (__mmask8) __U);
5801 }
5802
5803 extern __inline __m512i
5804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5805 _mm512_loadu_si512 (void const *__P)
5806 {
5807 return *(__m512i_u *)__P;
5808 }
5809
5810 extern __inline __m512i
5811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5812 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5813 {
5814 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
5815 (__v16si) __W,
5816 (__mmask16) __U);
5817 }
5818
5819 extern __inline __m512i
5820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5821 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5822 {
5823 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
5824 (__v16si)
5825 _mm512_setzero_si512 (),
5826 (__mmask16) __U);
5827 }
5828
5829 extern __inline void
5830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5831 _mm512_storeu_si512 (void *__P, __m512i __A)
5832 {
5833 *(__m512i_u *)__P = __A;
5834 }
5835
5836 extern __inline void
5837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5838 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5839 {
5840 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
5841 (__mmask16) __U);
5842 }
5843
5844 extern __inline __m512d
5845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5847 {
5848 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5849 (__v8di) __C,
5850 (__v8df)
5851 _mm512_undefined_pd (),
5852 (__mmask8) -1);
5853 }
5854
5855 extern __inline __m512d
5856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5857 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5858 {
5859 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5860 (__v8di) __C,
5861 (__v8df) __W,
5862 (__mmask8) __U);
5863 }
5864
5865 extern __inline __m512d
5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5868 {
5869 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5870 (__v8di) __C,
5871 (__v8df)
5872 _mm512_setzero_pd (),
5873 (__mmask8) __U);
5874 }
5875
5876 extern __inline __m512
5877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5878 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5879 {
5880 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5881 (__v16si) __C,
5882 (__v16sf)
5883 _mm512_undefined_ps (),
5884 (__mmask16) -1);
5885 }
5886
5887 extern __inline __m512
5888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5889 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5890 {
5891 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5892 (__v16si) __C,
5893 (__v16sf) __W,
5894 (__mmask16) __U);
5895 }
5896
5897 extern __inline __m512
5898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5899 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5900 {
5901 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5902 (__v16si) __C,
5903 (__v16sf)
5904 _mm512_setzero_ps (),
5905 (__mmask16) __U);
5906 }
5907
5908 extern __inline __m512i
5909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5910 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5911 {
5912 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5913 /* idx */ ,
5914 (__v8di) __A,
5915 (__v8di) __B,
5916 (__mmask8) -1);
5917 }
5918
5919 extern __inline __m512i
5920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5922 __m512i __B)
5923 {
5924 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5925 /* idx */ ,
5926 (__v8di) __A,
5927 (__v8di) __B,
5928 (__mmask8) __U);
5929 }
5930
5931 extern __inline __m512i
5932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5934 __mmask8 __U, __m512i __B)
5935 {
5936 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5937 (__v8di) __I
5938 /* idx */ ,
5939 (__v8di) __B,
5940 (__mmask8) __U);
5941 }
5942
5943 extern __inline __m512i
5944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5945 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5946 __m512i __I, __m512i __B)
5947 {
5948 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5949 /* idx */ ,
5950 (__v8di) __A,
5951 (__v8di) __B,
5952 (__mmask8) __U);
5953 }
5954
5955 extern __inline __m512i
5956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5957 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5958 {
5959 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5960 /* idx */ ,
5961 (__v16si) __A,
5962 (__v16si) __B,
5963 (__mmask16) -1);
5964 }
5965
5966 extern __inline __m512i
5967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5969 __m512i __I, __m512i __B)
5970 {
5971 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5972 /* idx */ ,
5973 (__v16si) __A,
5974 (__v16si) __B,
5975 (__mmask16) __U);
5976 }
5977
5978 extern __inline __m512i
5979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5980 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5981 __mmask16 __U, __m512i __B)
5982 {
5983 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5984 (__v16si) __I
5985 /* idx */ ,
5986 (__v16si) __B,
5987 (__mmask16) __U);
5988 }
5989
5990 extern __inline __m512i
5991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5992 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5993 __m512i __I, __m512i __B)
5994 {
5995 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5996 /* idx */ ,
5997 (__v16si) __A,
5998 (__v16si) __B,
5999 (__mmask16) __U);
6000 }
6001
6002 extern __inline __m512d
6003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6004 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6005 {
6006 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6007 /* idx */ ,
6008 (__v8df) __A,
6009 (__v8df) __B,
6010 (__mmask8) -1);
6011 }
6012
6013 extern __inline __m512d
6014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6015 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6016 __m512d __B)
6017 {
6018 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6019 /* idx */ ,
6020 (__v8df) __A,
6021 (__v8df) __B,
6022 (__mmask8) __U);
6023 }
6024
6025 extern __inline __m512d
6026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6027 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6028 __m512d __B)
6029 {
6030 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6031 (__v8di) __I
6032 /* idx */ ,
6033 (__v8df) __B,
6034 (__mmask8) __U);
6035 }
6036
6037 extern __inline __m512d
6038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6040 __m512d __B)
6041 {
6042 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6043 /* idx */ ,
6044 (__v8df) __A,
6045 (__v8df) __B,
6046 (__mmask8) __U);
6047 }
6048
6049 extern __inline __m512
6050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6051 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6052 {
6053 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6054 /* idx */ ,
6055 (__v16sf) __A,
6056 (__v16sf) __B,
6057 (__mmask16) -1);
6058 }
6059
6060 extern __inline __m512
6061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6062 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6063 {
6064 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6065 /* idx */ ,
6066 (__v16sf) __A,
6067 (__v16sf) __B,
6068 (__mmask16) __U);
6069 }
6070
6071 extern __inline __m512
6072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6074 __m512 __B)
6075 {
6076 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6077 (__v16si) __I
6078 /* idx */ ,
6079 (__v16sf) __B,
6080 (__mmask16) __U);
6081 }
6082
6083 extern __inline __m512
6084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6085 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6086 __m512 __B)
6087 {
6088 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6089 /* idx */ ,
6090 (__v16sf) __A,
6091 (__v16sf) __B,
6092 (__mmask16) __U);
6093 }
6094
6095 #ifdef __OPTIMIZE__
6096 extern __inline __m512d
6097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6098 _mm512_permute_pd (__m512d __X, const int __C)
6099 {
6100 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6101 (__v8df)
6102 _mm512_undefined_pd (),
6103 (__mmask8) -1);
6104 }
6105
6106 extern __inline __m512d
6107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6108 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6109 {
6110 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6111 (__v8df) __W,
6112 (__mmask8) __U);
6113 }
6114
6115 extern __inline __m512d
6116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6117 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6118 {
6119 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6120 (__v8df)
6121 _mm512_setzero_pd (),
6122 (__mmask8) __U);
6123 }
6124
6125 extern __inline __m512
6126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6127 _mm512_permute_ps (__m512 __X, const int __C)
6128 {
6129 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6130 (__v16sf)
6131 _mm512_undefined_ps (),
6132 (__mmask16) -1);
6133 }
6134
6135 extern __inline __m512
6136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6137 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6138 {
6139 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6140 (__v16sf) __W,
6141 (__mmask16) __U);
6142 }
6143
6144 extern __inline __m512
6145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6146 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6147 {
6148 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6149 (__v16sf)
6150 _mm512_setzero_ps (),
6151 (__mmask16) __U);
6152 }
6153 #else
6154 #define _mm512_permute_pd(X, C) \
6155 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6156 (__v8df)(__m512d)_mm512_undefined_pd(),\
6157 (__mmask8)(-1)))
6158
6159 #define _mm512_mask_permute_pd(W, U, X, C) \
6160 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6161 (__v8df)(__m512d)(W), \
6162 (__mmask8)(U)))
6163
6164 #define _mm512_maskz_permute_pd(U, X, C) \
6165 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6166 (__v8df)(__m512d)_mm512_setzero_pd(), \
6167 (__mmask8)(U)))
6168
6169 #define _mm512_permute_ps(X, C) \
6170 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6171 (__v16sf)(__m512)_mm512_undefined_ps(),\
6172 (__mmask16)(-1)))
6173
6174 #define _mm512_mask_permute_ps(W, U, X, C) \
6175 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6176 (__v16sf)(__m512)(W), \
6177 (__mmask16)(U)))
6178
6179 #define _mm512_maskz_permute_ps(U, X, C) \
6180 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6181 (__v16sf)(__m512)_mm512_setzero_ps(), \
6182 (__mmask16)(U)))
6183 #endif
6184
6185 #ifdef __OPTIMIZE__
6186 extern __inline __m512i
6187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6188 _mm512_permutex_epi64 (__m512i __X, const int __I)
6189 {
6190 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6191 (__v8di)
6192 _mm512_undefined_epi32 (),
6193 (__mmask8) (-1));
6194 }
6195
6196 extern __inline __m512i
6197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6198 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6199 __m512i __X, const int __I)
6200 {
6201 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6202 (__v8di) __W,
6203 (__mmask8) __M);
6204 }
6205
6206 extern __inline __m512i
6207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6208 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6209 {
6210 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6211 (__v8di)
6212 _mm512_setzero_si512 (),
6213 (__mmask8) __M);
6214 }
6215
6216 extern __inline __m512d
6217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6218 _mm512_permutex_pd (__m512d __X, const int __M)
6219 {
6220 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6221 (__v8df)
6222 _mm512_undefined_pd (),
6223 (__mmask8) -1);
6224 }
6225
6226 extern __inline __m512d
6227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6228 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6229 {
6230 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6231 (__v8df) __W,
6232 (__mmask8) __U);
6233 }
6234
6235 extern __inline __m512d
6236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6237 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6238 {
6239 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6240 (__v8df)
6241 _mm512_setzero_pd (),
6242 (__mmask8) __U);
6243 }
6244 #else
6245 #define _mm512_permutex_pd(X, M) \
6246 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6247 (__v8df)(__m512d)_mm512_undefined_pd(),\
6248 (__mmask8)-1))
6249
6250 #define _mm512_mask_permutex_pd(W, U, X, M) \
6251 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6252 (__v8df)(__m512d)(W), (__mmask8)(U)))
6253
6254 #define _mm512_maskz_permutex_pd(U, X, M) \
6255 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6256 (__v8df)(__m512d)_mm512_setzero_pd(),\
6257 (__mmask8)(U)))
6258
6259 #define _mm512_permutex_epi64(X, I) \
6260 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6261 (int)(I), \
6262 (__v8di)(__m512i) \
6263 (_mm512_undefined_epi32 ()),\
6264 (__mmask8)(-1)))
6265
6266 #define _mm512_maskz_permutex_epi64(M, X, I) \
6267 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6268 (int)(I), \
6269 (__v8di)(__m512i) \
6270 (_mm512_setzero_si512 ()),\
6271 (__mmask8)(M)))
6272
6273 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6274 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6275 (int)(I), \
6276 (__v8di)(__m512i)(W), \
6277 (__mmask8)(M)))
6278 #endif
6279
6280 extern __inline __m512i
6281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6282 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6283 {
6284 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6285 (__v8di) __X,
6286 (__v8di)
6287 _mm512_setzero_si512 (),
6288 __M);
6289 }
6290
6291 extern __inline __m512i
6292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6293 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6294 {
6295 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6296 (__v8di) __X,
6297 (__v8di)
6298 _mm512_undefined_epi32 (),
6299 (__mmask8) -1);
6300 }
6301
6302 extern __inline __m512i
6303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6304 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6305 __m512i __Y)
6306 {
6307 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6308 (__v8di) __X,
6309 (__v8di) __W,
6310 __M);
6311 }
6312
6313 extern __inline __m512i
6314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6315 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6316 {
6317 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6318 (__v16si) __X,
6319 (__v16si)
6320 _mm512_setzero_si512 (),
6321 __M);
6322 }
6323
6324 extern __inline __m512i
6325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6326 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6327 {
6328 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6329 (__v16si) __X,
6330 (__v16si)
6331 _mm512_undefined_epi32 (),
6332 (__mmask16) -1);
6333 }
6334
6335 extern __inline __m512i
6336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6337 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6338 __m512i __Y)
6339 {
6340 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6341 (__v16si) __X,
6342 (__v16si) __W,
6343 __M);
6344 }
6345
6346 extern __inline __m512d
6347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6348 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6349 {
6350 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6351 (__v8di) __X,
6352 (__v8df)
6353 _mm512_undefined_pd (),
6354 (__mmask8) -1);
6355 }
6356
6357 extern __inline __m512d
6358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6359 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6360 {
6361 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6362 (__v8di) __X,
6363 (__v8df) __W,
6364 (__mmask8) __U);
6365 }
6366
6367 extern __inline __m512d
6368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6369 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6370 {
6371 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6372 (__v8di) __X,
6373 (__v8df)
6374 _mm512_setzero_pd (),
6375 (__mmask8) __U);
6376 }
6377
6378 extern __inline __m512
6379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6380 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6381 {
6382 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6383 (__v16si) __X,
6384 (__v16sf)
6385 _mm512_undefined_ps (),
6386 (__mmask16) -1);
6387 }
6388
6389 extern __inline __m512
6390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6391 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6392 {
6393 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6394 (__v16si) __X,
6395 (__v16sf) __W,
6396 (__mmask16) __U);
6397 }
6398
6399 extern __inline __m512
6400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6401 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6402 {
6403 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6404 (__v16si) __X,
6405 (__v16sf)
6406 _mm512_setzero_ps (),
6407 (__mmask16) __U);
6408 }
6409
6410 #ifdef __OPTIMIZE__
6411 extern __inline __m512
6412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6413 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6414 {
6415 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6416 (__v16sf) __V, __imm,
6417 (__v16sf)
6418 _mm512_undefined_ps (),
6419 (__mmask16) -1);
6420 }
6421
6422 extern __inline __m512
6423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6424 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6425 __m512 __V, const int __imm)
6426 {
6427 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6428 (__v16sf) __V, __imm,
6429 (__v16sf) __W,
6430 (__mmask16) __U);
6431 }
6432
6433 extern __inline __m512
6434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6435 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6436 {
6437 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6438 (__v16sf) __V, __imm,
6439 (__v16sf)
6440 _mm512_setzero_ps (),
6441 (__mmask16) __U);
6442 }
6443
6444 extern __inline __m512d
6445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6447 {
6448 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6449 (__v8df) __V, __imm,
6450 (__v8df)
6451 _mm512_undefined_pd (),
6452 (__mmask8) -1);
6453 }
6454
6455 extern __inline __m512d
6456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6457 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6458 __m512d __V, const int __imm)
6459 {
6460 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6461 (__v8df) __V, __imm,
6462 (__v8df) __W,
6463 (__mmask8) __U);
6464 }
6465
6466 extern __inline __m512d
6467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6469 const int __imm)
6470 {
6471 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6472 (__v8df) __V, __imm,
6473 (__v8df)
6474 _mm512_setzero_pd (),
6475 (__mmask8) __U);
6476 }
6477
6478 extern __inline __m512d
6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6481 const int __imm, const int __R)
6482 {
6483 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6484 (__v8df) __B,
6485 (__v8di) __C,
6486 __imm,
6487 (__mmask8) -1, __R);
6488 }
6489
6490 extern __inline __m512d
6491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6492 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6493 __m512i __C, const int __imm, const int __R)
6494 {
6495 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6496 (__v8df) __B,
6497 (__v8di) __C,
6498 __imm,
6499 (__mmask8) __U, __R);
6500 }
6501
6502 extern __inline __m512d
6503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6504 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6505 __m512i __C, const int __imm, const int __R)
6506 {
6507 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6508 (__v8df) __B,
6509 (__v8di) __C,
6510 __imm,
6511 (__mmask8) __U, __R);
6512 }
6513
6514 extern __inline __m512
6515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6516 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6517 const int __imm, const int __R)
6518 {
6519 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6520 (__v16sf) __B,
6521 (__v16si) __C,
6522 __imm,
6523 (__mmask16) -1, __R);
6524 }
6525
6526 extern __inline __m512
6527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6528 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6529 __m512i __C, const int __imm, const int __R)
6530 {
6531 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6532 (__v16sf) __B,
6533 (__v16si) __C,
6534 __imm,
6535 (__mmask16) __U, __R);
6536 }
6537
6538 extern __inline __m512
6539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6540 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6541 __m512i __C, const int __imm, const int __R)
6542 {
6543 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6544 (__v16sf) __B,
6545 (__v16si) __C,
6546 __imm,
6547 (__mmask16) __U, __R);
6548 }
6549
6550 extern __inline __m128d
6551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6552 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6553 const int __imm, const int __R)
6554 {
6555 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6556 (__v2df) __B,
6557 (__v2di) __C, __imm,
6558 (__mmask8) -1, __R);
6559 }
6560
6561 extern __inline __m128d
6562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6563 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6564 __m128i __C, const int __imm, const int __R)
6565 {
6566 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6567 (__v2df) __B,
6568 (__v2di) __C, __imm,
6569 (__mmask8) __U, __R);
6570 }
6571
6572 extern __inline __m128d
6573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6574 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6575 __m128i __C, const int __imm, const int __R)
6576 {
6577 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6578 (__v2df) __B,
6579 (__v2di) __C,
6580 __imm,
6581 (__mmask8) __U, __R);
6582 }
6583
6584 extern __inline __m128
6585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6586 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6587 const int __imm, const int __R)
6588 {
6589 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6590 (__v4sf) __B,
6591 (__v4si) __C, __imm,
6592 (__mmask8) -1, __R);
6593 }
6594
6595 extern __inline __m128
6596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6597 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6598 __m128i __C, const int __imm, const int __R)
6599 {
6600 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6601 (__v4sf) __B,
6602 (__v4si) __C, __imm,
6603 (__mmask8) __U, __R);
6604 }
6605
6606 extern __inline __m128
6607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6608 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6609 __m128i __C, const int __imm, const int __R)
6610 {
6611 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6612 (__v4sf) __B,
6613 (__v4si) __C, __imm,
6614 (__mmask8) __U, __R);
6615 }
6616
6617 #else
6618 #define _mm512_shuffle_pd(X, Y, C) \
6619 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6620 (__v8df)(__m512d)(Y), (int)(C),\
6621 (__v8df)(__m512d)_mm512_undefined_pd(),\
6622 (__mmask8)-1))
6623
6624 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6625 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6626 (__v8df)(__m512d)(Y), (int)(C),\
6627 (__v8df)(__m512d)(W),\
6628 (__mmask8)(U)))
6629
6630 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6631 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6632 (__v8df)(__m512d)(Y), (int)(C),\
6633 (__v8df)(__m512d)_mm512_setzero_pd(),\
6634 (__mmask8)(U)))
6635
6636 #define _mm512_shuffle_ps(X, Y, C) \
6637 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6638 (__v16sf)(__m512)(Y), (int)(C),\
6639 (__v16sf)(__m512)_mm512_undefined_ps(),\
6640 (__mmask16)-1))
6641
6642 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6643 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6644 (__v16sf)(__m512)(Y), (int)(C),\
6645 (__v16sf)(__m512)(W),\
6646 (__mmask16)(U)))
6647
6648 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6649 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6650 (__v16sf)(__m512)(Y), (int)(C),\
6651 (__v16sf)(__m512)_mm512_setzero_ps(),\
6652 (__mmask16)(U)))
6653
6654 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6655 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6656 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6657 (__mmask8)(-1), (R)))
6658
6659 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6660 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6661 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6662 (__mmask8)(U), (R)))
6663
6664 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6665 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6666 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6667 (__mmask8)(U), (R)))
6668
6669 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6670 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6671 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6672 (__mmask16)(-1), (R)))
6673
6674 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6675 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6676 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6677 (__mmask16)(U), (R)))
6678
6679 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6680 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6681 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6682 (__mmask16)(U), (R)))
6683
6684 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6685 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6686 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6687 (__mmask8)(-1), (R)))
6688
6689 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6690 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6691 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6692 (__mmask8)(U), (R)))
6693
6694 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6695 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6696 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6697 (__mmask8)(U), (R)))
6698
6699 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6700 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6701 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6702 (__mmask8)(-1), (R)))
6703
6704 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6705 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6706 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6707 (__mmask8)(U), (R)))
6708
6709 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6710 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6711 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6712 (__mmask8)(U), (R)))
6713 #endif
6714
6715 extern __inline __m512
6716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6717 _mm512_movehdup_ps (__m512 __A)
6718 {
6719 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6720 (__v16sf)
6721 _mm512_undefined_ps (),
6722 (__mmask16) -1);
6723 }
6724
6725 extern __inline __m512
6726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6727 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6728 {
6729 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6730 (__v16sf) __W,
6731 (__mmask16) __U);
6732 }
6733
6734 extern __inline __m512
6735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6736 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6737 {
6738 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6739 (__v16sf)
6740 _mm512_setzero_ps (),
6741 (__mmask16) __U);
6742 }
6743
6744 extern __inline __m512
6745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746 _mm512_moveldup_ps (__m512 __A)
6747 {
6748 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6749 (__v16sf)
6750 _mm512_undefined_ps (),
6751 (__mmask16) -1);
6752 }
6753
6754 extern __inline __m512
6755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6756 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6757 {
6758 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6759 (__v16sf) __W,
6760 (__mmask16) __U);
6761 }
6762
6763 extern __inline __m512
6764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6765 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6766 {
6767 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6768 (__v16sf)
6769 _mm512_setzero_ps (),
6770 (__mmask16) __U);
6771 }
6772
6773 extern __inline __m512i
6774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6775 _mm512_or_si512 (__m512i __A, __m512i __B)
6776 {
6777 return (__m512i) ((__v16su) __A | (__v16su) __B);
6778 }
6779
6780 extern __inline __m512i
6781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6782 _mm512_or_epi32 (__m512i __A, __m512i __B)
6783 {
6784 return (__m512i) ((__v16su) __A | (__v16su) __B);
6785 }
6786
6787 extern __inline __m512i
6788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6789 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6790 {
6791 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6792 (__v16si) __B,
6793 (__v16si) __W,
6794 (__mmask16) __U);
6795 }
6796
6797 extern __inline __m512i
6798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6799 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6800 {
6801 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6802 (__v16si) __B,
6803 (__v16si)
6804 _mm512_setzero_si512 (),
6805 (__mmask16) __U);
6806 }
6807
6808 extern __inline __m512i
6809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6810 _mm512_or_epi64 (__m512i __A, __m512i __B)
6811 {
6812 return (__m512i) ((__v8du) __A | (__v8du) __B);
6813 }
6814
6815 extern __inline __m512i
6816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6817 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6818 {
6819 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6820 (__v8di) __B,
6821 (__v8di) __W,
6822 (__mmask8) __U);
6823 }
6824
6825 extern __inline __m512i
6826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6827 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6828 {
6829 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6830 (__v8di) __B,
6831 (__v8di)
6832 _mm512_setzero_si512 (),
6833 (__mmask8) __U);
6834 }
6835
6836 extern __inline __m512i
6837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6838 _mm512_xor_si512 (__m512i __A, __m512i __B)
6839 {
6840 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6841 }
6842
6843 extern __inline __m512i
6844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6846 {
6847 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6848 }
6849
6850 extern __inline __m512i
6851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6852 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6853 {
6854 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6855 (__v16si) __B,
6856 (__v16si) __W,
6857 (__mmask16) __U);
6858 }
6859
6860 extern __inline __m512i
6861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6862 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6863 {
6864 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6865 (__v16si) __B,
6866 (__v16si)
6867 _mm512_setzero_si512 (),
6868 (__mmask16) __U);
6869 }
6870
6871 extern __inline __m512i
6872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6873 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6874 {
6875 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6876 }
6877
6878 extern __inline __m512i
6879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6880 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6881 {
6882 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6883 (__v8di) __B,
6884 (__v8di) __W,
6885 (__mmask8) __U);
6886 }
6887
6888 extern __inline __m512i
6889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6890 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6891 {
6892 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6893 (__v8di) __B,
6894 (__v8di)
6895 _mm512_setzero_si512 (),
6896 (__mmask8) __U);
6897 }
6898
6899 #ifdef __OPTIMIZE__
6900 extern __inline __m512i
6901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6902 _mm512_rol_epi32 (__m512i __A, const int __B)
6903 {
6904 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6905 (__v16si)
6906 _mm512_undefined_epi32 (),
6907 (__mmask16) -1);
6908 }
6909
6910 extern __inline __m512i
6911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6912 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6913 {
6914 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6915 (__v16si) __W,
6916 (__mmask16) __U);
6917 }
6918
6919 extern __inline __m512i
6920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6921 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6922 {
6923 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6924 (__v16si)
6925 _mm512_setzero_si512 (),
6926 (__mmask16) __U);
6927 }
6928
6929 extern __inline __m512i
6930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6931 _mm512_ror_epi32 (__m512i __A, int __B)
6932 {
6933 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6934 (__v16si)
6935 _mm512_undefined_epi32 (),
6936 (__mmask16) -1);
6937 }
6938
6939 extern __inline __m512i
6940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6941 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6942 {
6943 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6944 (__v16si) __W,
6945 (__mmask16) __U);
6946 }
6947
6948 extern __inline __m512i
6949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6950 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6951 {
6952 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6953 (__v16si)
6954 _mm512_setzero_si512 (),
6955 (__mmask16) __U);
6956 }
6957
6958 extern __inline __m512i
6959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6960 _mm512_rol_epi64 (__m512i __A, const int __B)
6961 {
6962 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6963 (__v8di)
6964 _mm512_undefined_epi32 (),
6965 (__mmask8) -1);
6966 }
6967
6968 extern __inline __m512i
6969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6970 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6971 {
6972 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6973 (__v8di) __W,
6974 (__mmask8) __U);
6975 }
6976
6977 extern __inline __m512i
6978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6979 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6980 {
6981 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6982 (__v8di)
6983 _mm512_setzero_si512 (),
6984 (__mmask8) __U);
6985 }
6986
6987 extern __inline __m512i
6988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6989 _mm512_ror_epi64 (__m512i __A, int __B)
6990 {
6991 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6992 (__v8di)
6993 _mm512_undefined_epi32 (),
6994 (__mmask8) -1);
6995 }
6996
6997 extern __inline __m512i
6998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7000 {
7001 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7002 (__v8di) __W,
7003 (__mmask8) __U);
7004 }
7005
7006 extern __inline __m512i
7007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7008 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7009 {
7010 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7011 (__v8di)
7012 _mm512_setzero_si512 (),
7013 (__mmask8) __U);
7014 }
7015
7016 #else
7017 #define _mm512_rol_epi32(A, B) \
7018 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7019 (int)(B), \
7020 (__v16si)_mm512_undefined_epi32 (), \
7021 (__mmask16)(-1)))
7022 #define _mm512_mask_rol_epi32(W, U, A, B) \
7023 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7024 (int)(B), \
7025 (__v16si)(__m512i)(W), \
7026 (__mmask16)(U)))
7027 #define _mm512_maskz_rol_epi32(U, A, B) \
7028 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7029 (int)(B), \
7030 (__v16si)_mm512_setzero_si512 (), \
7031 (__mmask16)(U)))
7032 #define _mm512_ror_epi32(A, B) \
7033 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7034 (int)(B), \
7035 (__v16si)_mm512_undefined_epi32 (), \
7036 (__mmask16)(-1)))
7037 #define _mm512_mask_ror_epi32(W, U, A, B) \
7038 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7039 (int)(B), \
7040 (__v16si)(__m512i)(W), \
7041 (__mmask16)(U)))
7042 #define _mm512_maskz_ror_epi32(U, A, B) \
7043 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7044 (int)(B), \
7045 (__v16si)_mm512_setzero_si512 (), \
7046 (__mmask16)(U)))
7047 #define _mm512_rol_epi64(A, B) \
7048 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7049 (int)(B), \
7050 (__v8di)_mm512_undefined_epi32 (), \
7051 (__mmask8)(-1)))
7052 #define _mm512_mask_rol_epi64(W, U, A, B) \
7053 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7054 (int)(B), \
7055 (__v8di)(__m512i)(W), \
7056 (__mmask8)(U)))
7057 #define _mm512_maskz_rol_epi64(U, A, B) \
7058 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7059 (int)(B), \
7060 (__v8di)_mm512_setzero_si512 (), \
7061 (__mmask8)(U)))
7062
7063 #define _mm512_ror_epi64(A, B) \
7064 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7065 (int)(B), \
7066 (__v8di)_mm512_undefined_epi32 (), \
7067 (__mmask8)(-1)))
7068 #define _mm512_mask_ror_epi64(W, U, A, B) \
7069 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7070 (int)(B), \
7071 (__v8di)(__m512i)(W), \
7072 (__mmask8)(U)))
7073 #define _mm512_maskz_ror_epi64(U, A, B) \
7074 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7075 (int)(B), \
7076 (__v8di)_mm512_setzero_si512 (), \
7077 (__mmask8)(U)))
7078 #endif
7079
7080 extern __inline __m512i
7081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7082 _mm512_and_si512 (__m512i __A, __m512i __B)
7083 {
7084 return (__m512i) ((__v16su) __A & (__v16su) __B);
7085 }
7086
7087 extern __inline __m512i
7088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7089 _mm512_and_epi32 (__m512i __A, __m512i __B)
7090 {
7091 return (__m512i) ((__v16su) __A & (__v16su) __B);
7092 }
7093
7094 extern __inline __m512i
7095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7096 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7097 {
7098 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7099 (__v16si) __B,
7100 (__v16si) __W,
7101 (__mmask16) __U);
7102 }
7103
7104 extern __inline __m512i
7105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7106 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7107 {
7108 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7109 (__v16si) __B,
7110 (__v16si)
7111 _mm512_setzero_si512 (),
7112 (__mmask16) __U);
7113 }
7114
7115 extern __inline __m512i
7116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7117 _mm512_and_epi64 (__m512i __A, __m512i __B)
7118 {
7119 return (__m512i) ((__v8du) __A & (__v8du) __B);
7120 }
7121
7122 extern __inline __m512i
7123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7124 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7125 {
7126 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7127 (__v8di) __B,
7128 (__v8di) __W, __U);
7129 }
7130
7131 extern __inline __m512i
7132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7133 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7134 {
7135 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7136 (__v8di) __B,
7137 (__v8di)
7138 _mm512_setzero_pd (),
7139 __U);
7140 }
7141
7142 extern __inline __m512i
7143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7144 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7145 {
7146 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7147 (__v16si) __B,
7148 (__v16si)
7149 _mm512_undefined_epi32 (),
7150 (__mmask16) -1);
7151 }
7152
7153 extern __inline __m512i
7154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7155 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7156 {
7157 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7158 (__v16si) __B,
7159 (__v16si)
7160 _mm512_undefined_epi32 (),
7161 (__mmask16) -1);
7162 }
7163
7164 extern __inline __m512i
7165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7166 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7167 {
7168 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7169 (__v16si) __B,
7170 (__v16si) __W,
7171 (__mmask16) __U);
7172 }
7173
7174 extern __inline __m512i
7175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7176 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7177 {
7178 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7179 (__v16si) __B,
7180 (__v16si)
7181 _mm512_setzero_si512 (),
7182 (__mmask16) __U);
7183 }
7184
7185 extern __inline __m512i
7186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7187 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7188 {
7189 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7190 (__v8di) __B,
7191 (__v8di)
7192 _mm512_undefined_epi32 (),
7193 (__mmask8) -1);
7194 }
7195
7196 extern __inline __m512i
7197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7198 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7199 {
7200 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7201 (__v8di) __B,
7202 (__v8di) __W, __U);
7203 }
7204
7205 extern __inline __m512i
7206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7207 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7208 {
7209 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7210 (__v8di) __B,
7211 (__v8di)
7212 _mm512_setzero_pd (),
7213 __U);
7214 }
7215
7216 extern __inline __mmask16
7217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7218 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7219 {
7220 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7221 (__v16si) __B,
7222 (__mmask16) -1);
7223 }
7224
7225 extern __inline __mmask16
7226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7227 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7228 {
7229 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7230 (__v16si) __B, __U);
7231 }
7232
7233 extern __inline __mmask8
7234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7236 {
7237 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7238 (__v8di) __B,
7239 (__mmask8) -1);
7240 }
7241
7242 extern __inline __mmask8
7243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7244 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7245 {
7246 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7247 }
7248
7249 extern __inline __mmask16
7250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7251 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7252 {
7253 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7254 (__v16si) __B,
7255 (__mmask16) -1);
7256 }
7257
7258 extern __inline __mmask16
7259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7260 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7261 {
7262 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7263 (__v16si) __B, __U);
7264 }
7265
7266 extern __inline __mmask8
7267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7268 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7269 {
7270 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7271 (__v8di) __B,
7272 (__mmask8) -1);
7273 }
7274
7275 extern __inline __mmask8
7276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7277 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7278 {
7279 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7280 (__v8di) __B, __U);
7281 }
7282
7283 extern __inline __m512
7284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7285 _mm512_abs_ps (__m512 __A)
7286 {
7287 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7288 _mm512_set1_epi32 (0x7fffffff));
7289 }
7290
7291 extern __inline __m512
7292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7294 {
7295 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7296 _mm512_set1_epi32 (0x7fffffff));
7297 }
7298
7299 extern __inline __m512d
7300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7301 _mm512_abs_pd (__m512 __A)
7302 {
7303 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7304 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7305 }
7306
7307 extern __inline __m512d
7308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7309 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512 __A)
7310 {
7311 return (__m512d)
7312 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7313 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7314 }
7315
7316 extern __inline __m512i
7317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7318 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7319 {
7320 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7321 (__v16si) __B,
7322 (__v16si)
7323 _mm512_undefined_epi32 (),
7324 (__mmask16) -1);
7325 }
7326
7327 extern __inline __m512i
7328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7329 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7330 __m512i __B)
7331 {
7332 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7333 (__v16si) __B,
7334 (__v16si) __W,
7335 (__mmask16) __U);
7336 }
7337
7338 extern __inline __m512i
7339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7340 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7341 {
7342 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7343 (__v16si) __B,
7344 (__v16si)
7345 _mm512_setzero_si512 (),
7346 (__mmask16) __U);
7347 }
7348
7349 extern __inline __m512i
7350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7351 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7352 {
7353 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7354 (__v8di) __B,
7355 (__v8di)
7356 _mm512_undefined_epi32 (),
7357 (__mmask8) -1);
7358 }
7359
7360 extern __inline __m512i
7361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7362 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7363 {
7364 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7365 (__v8di) __B,
7366 (__v8di) __W,
7367 (__mmask8) __U);
7368 }
7369
7370 extern __inline __m512i
7371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7372 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7373 {
7374 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7375 (__v8di) __B,
7376 (__v8di)
7377 _mm512_setzero_si512 (),
7378 (__mmask8) __U);
7379 }
7380
7381 extern __inline __m512i
7382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7383 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7384 {
7385 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7386 (__v16si) __B,
7387 (__v16si)
7388 _mm512_undefined_epi32 (),
7389 (__mmask16) -1);
7390 }
7391
7392 extern __inline __m512i
7393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7394 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7395 __m512i __B)
7396 {
7397 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7398 (__v16si) __B,
7399 (__v16si) __W,
7400 (__mmask16) __U);
7401 }
7402
7403 extern __inline __m512i
7404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7405 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7406 {
7407 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7408 (__v16si) __B,
7409 (__v16si)
7410 _mm512_setzero_si512 (),
7411 (__mmask16) __U);
7412 }
7413
7414 extern __inline __m512i
7415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7416 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7417 {
7418 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7419 (__v8di) __B,
7420 (__v8di)
7421 _mm512_undefined_epi32 (),
7422 (__mmask8) -1);
7423 }
7424
7425 extern __inline __m512i
7426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7427 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7428 {
7429 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7430 (__v8di) __B,
7431 (__v8di) __W,
7432 (__mmask8) __U);
7433 }
7434
7435 extern __inline __m512i
7436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7437 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7438 {
7439 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7440 (__v8di) __B,
7441 (__v8di)
7442 _mm512_setzero_si512 (),
7443 (__mmask8) __U);
7444 }
7445
7446 #ifdef __x86_64__
7447 #ifdef __OPTIMIZE__
7448 extern __inline unsigned long long
7449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7450 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7451 {
7452 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7453 }
7454
7455 extern __inline long long
7456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7457 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7458 {
7459 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7460 }
7461
7462 extern __inline long long
7463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7464 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7465 {
7466 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7467 }
7468
7469 extern __inline unsigned long long
7470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7471 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7472 {
7473 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7474 }
7475
7476 extern __inline long long
7477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7478 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7479 {
7480 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7481 }
7482
7483 extern __inline long long
7484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7485 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7486 {
7487 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7488 }
7489 #else
7490 #define _mm_cvt_roundss_u64(A, B) \
7491 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7492
7493 #define _mm_cvt_roundss_si64(A, B) \
7494 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7495
7496 #define _mm_cvt_roundss_i64(A, B) \
7497 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7498
7499 #define _mm_cvtt_roundss_u64(A, B) \
7500 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7501
7502 #define _mm_cvtt_roundss_i64(A, B) \
7503 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7504
7505 #define _mm_cvtt_roundss_si64(A, B) \
7506 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7507 #endif
7508 #endif
7509
7510 #ifdef __OPTIMIZE__
7511 extern __inline unsigned
7512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7513 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7514 {
7515 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7516 }
7517
7518 extern __inline int
7519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7520 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7521 {
7522 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7523 }
7524
7525 extern __inline int
7526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7527 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7528 {
7529 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7530 }
7531
7532 extern __inline unsigned
7533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7534 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7535 {
7536 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7537 }
7538
7539 extern __inline int
7540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7541 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7542 {
7543 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7544 }
7545
7546 extern __inline int
7547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7548 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7549 {
7550 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7551 }
7552 #else
7553 #define _mm_cvt_roundss_u32(A, B) \
7554 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7555
7556 #define _mm_cvt_roundss_si32(A, B) \
7557 ((int)__builtin_ia32_vcvtss2si32(A, B))
7558
7559 #define _mm_cvt_roundss_i32(A, B) \
7560 ((int)__builtin_ia32_vcvtss2si32(A, B))
7561
7562 #define _mm_cvtt_roundss_u32(A, B) \
7563 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7564
7565 #define _mm_cvtt_roundss_si32(A, B) \
7566 ((int)__builtin_ia32_vcvttss2si32(A, B))
7567
7568 #define _mm_cvtt_roundss_i32(A, B) \
7569 ((int)__builtin_ia32_vcvttss2si32(A, B))
7570 #endif
7571
7572 #ifdef __x86_64__
7573 #ifdef __OPTIMIZE__
7574 extern __inline unsigned long long
7575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7576 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7577 {
7578 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7579 }
7580
7581 extern __inline long long
7582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7583 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7584 {
7585 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7586 }
7587
7588 extern __inline long long
7589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7590 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7591 {
7592 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7593 }
7594
7595 extern __inline unsigned long long
7596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7598 {
7599 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7600 }
7601
7602 extern __inline long long
7603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7604 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7605 {
7606 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7607 }
7608
7609 extern __inline long long
7610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7611 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7612 {
7613 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7614 }
7615 #else
7616 #define _mm_cvt_roundsd_u64(A, B) \
7617 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7618
7619 #define _mm_cvt_roundsd_si64(A, B) \
7620 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7621
7622 #define _mm_cvt_roundsd_i64(A, B) \
7623 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7624
7625 #define _mm_cvtt_roundsd_u64(A, B) \
7626 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7627
7628 #define _mm_cvtt_roundsd_si64(A, B) \
7629 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7630
7631 #define _mm_cvtt_roundsd_i64(A, B) \
7632 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7633 #endif
7634 #endif
7635
7636 #ifdef __OPTIMIZE__
7637 extern __inline unsigned
7638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7639 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7640 {
7641 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7642 }
7643
7644 extern __inline int
7645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7646 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7647 {
7648 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7649 }
7650
7651 extern __inline int
7652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7653 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7654 {
7655 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7656 }
7657
7658 extern __inline unsigned
7659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7660 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7661 {
7662 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7663 }
7664
7665 extern __inline int
7666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7667 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7668 {
7669 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7670 }
7671
7672 extern __inline int
7673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7674 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7675 {
7676 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7677 }
7678 #else
7679 #define _mm_cvt_roundsd_u32(A, B) \
7680 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7681
7682 #define _mm_cvt_roundsd_si32(A, B) \
7683 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7684
7685 #define _mm_cvt_roundsd_i32(A, B) \
7686 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7687
7688 #define _mm_cvtt_roundsd_u32(A, B) \
7689 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7690
7691 #define _mm_cvtt_roundsd_si32(A, B) \
7692 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7693
7694 #define _mm_cvtt_roundsd_i32(A, B) \
7695 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7696 #endif
7697
7698 extern __inline __m512d
7699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7700 _mm512_movedup_pd (__m512d __A)
7701 {
7702 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7703 (__v8df)
7704 _mm512_undefined_pd (),
7705 (__mmask8) -1);
7706 }
7707
7708 extern __inline __m512d
7709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7710 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7711 {
7712 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7713 (__v8df) __W,
7714 (__mmask8) __U);
7715 }
7716
7717 extern __inline __m512d
7718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7719 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7720 {
7721 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7722 (__v8df)
7723 _mm512_setzero_pd (),
7724 (__mmask8) __U);
7725 }
7726
7727 extern __inline __m512d
7728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7729 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7730 {
7731 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7732 (__v8df) __B,
7733 (__v8df)
7734 _mm512_undefined_pd (),
7735 (__mmask8) -1);
7736 }
7737
7738 extern __inline __m512d
7739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7740 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7741 {
7742 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7743 (__v8df) __B,
7744 (__v8df) __W,
7745 (__mmask8) __U);
7746 }
7747
7748 extern __inline __m512d
7749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7750 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7751 {
7752 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7753 (__v8df) __B,
7754 (__v8df)
7755 _mm512_setzero_pd (),
7756 (__mmask8) __U);
7757 }
7758
7759 extern __inline __m512d
7760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7761 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7762 {
7763 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7764 (__v8df) __B,
7765 (__v8df)
7766 _mm512_undefined_pd (),
7767 (__mmask8) -1);
7768 }
7769
7770 extern __inline __m512d
7771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7772 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7773 {
7774 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7775 (__v8df) __B,
7776 (__v8df) __W,
7777 (__mmask8) __U);
7778 }
7779
7780 extern __inline __m512d
7781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7782 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7783 {
7784 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7785 (__v8df) __B,
7786 (__v8df)
7787 _mm512_setzero_pd (),
7788 (__mmask8) __U);
7789 }
7790
7791 extern __inline __m512
7792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7793 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7794 {
7795 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7796 (__v16sf) __B,
7797 (__v16sf)
7798 _mm512_undefined_ps (),
7799 (__mmask16) -1);
7800 }
7801
7802 extern __inline __m512
7803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7804 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7805 {
7806 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7807 (__v16sf) __B,
7808 (__v16sf) __W,
7809 (__mmask16) __U);
7810 }
7811
7812 extern __inline __m512
7813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7814 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7815 {
7816 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7817 (__v16sf) __B,
7818 (__v16sf)
7819 _mm512_setzero_ps (),
7820 (__mmask16) __U);
7821 }
7822
7823 #ifdef __OPTIMIZE__
7824 extern __inline __m512d
7825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7826 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7827 {
7828 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7829 (__v8df)
7830 _mm512_undefined_pd (),
7831 (__mmask8) -1, __R);
7832 }
7833
7834 extern __inline __m512d
7835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7836 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7837 const int __R)
7838 {
7839 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7840 (__v8df) __W,
7841 (__mmask8) __U, __R);
7842 }
7843
7844 extern __inline __m512d
7845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7846 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7847 {
7848 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7849 (__v8df)
7850 _mm512_setzero_pd (),
7851 (__mmask8) __U, __R);
7852 }
7853
7854 extern __inline __m512
7855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7856 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7857 {
7858 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7859 (__v16sf)
7860 _mm512_undefined_ps (),
7861 (__mmask16) -1, __R);
7862 }
7863
7864 extern __inline __m512
7865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7866 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7867 const int __R)
7868 {
7869 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7870 (__v16sf) __W,
7871 (__mmask16) __U, __R);
7872 }
7873
7874 extern __inline __m512
7875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7876 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7877 {
7878 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7879 (__v16sf)
7880 _mm512_setzero_ps (),
7881 (__mmask16) __U, __R);
7882 }
7883
7884 extern __inline __m256i
7885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7886 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7887 {
7888 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7889 __I,
7890 (__v16hi)
7891 _mm256_undefined_si256 (),
7892 -1);
7893 }
7894
7895 extern __inline __m256i
7896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7897 _mm512_cvtps_ph (__m512 __A, const int __I)
7898 {
7899 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7900 __I,
7901 (__v16hi)
7902 _mm256_undefined_si256 (),
7903 -1);
7904 }
7905
7906 extern __inline __m256i
7907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7908 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7909 const int __I)
7910 {
7911 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7912 __I,
7913 (__v16hi) __U,
7914 (__mmask16) __W);
7915 }
7916
7917 extern __inline __m256i
7918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7919 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7920 {
7921 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7922 __I,
7923 (__v16hi) __U,
7924 (__mmask16) __W);
7925 }
7926
7927 extern __inline __m256i
7928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7929 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7930 {
7931 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7932 __I,
7933 (__v16hi)
7934 _mm256_setzero_si256 (),
7935 (__mmask16) __W);
7936 }
7937
7938 extern __inline __m256i
7939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7940 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7941 {
7942 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7943 __I,
7944 (__v16hi)
7945 _mm256_setzero_si256 (),
7946 (__mmask16) __W);
7947 }
7948 #else
7949 #define _mm512_cvt_roundps_pd(A, B) \
7950 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7951
7952 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7953 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7954
7955 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7956 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7957
7958 #define _mm512_cvt_roundph_ps(A, B) \
7959 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7960
7961 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7962 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7963
7964 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7965 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7966
7967 #define _mm512_cvt_roundps_ph(A, I) \
7968 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7969 (__v16hi)_mm256_undefined_si256 (), -1))
7970 #define _mm512_cvtps_ph(A, I) \
7971 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7972 (__v16hi)_mm256_undefined_si256 (), -1))
7973 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7974 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7975 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7976 #define _mm512_mask_cvtps_ph(U, W, A, I) \
7977 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7978 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7979 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7980 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7981 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7982 #define _mm512_maskz_cvtps_ph(W, A, I) \
7983 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7984 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7985 #endif
7986
7987 #ifdef __OPTIMIZE__
7988 extern __inline __m256
7989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7990 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7991 {
7992 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7993 (__v8sf)
7994 _mm256_undefined_ps (),
7995 (__mmask8) -1, __R);
7996 }
7997
7998 extern __inline __m256
7999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8000 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8001 const int __R)
8002 {
8003 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8004 (__v8sf) __W,
8005 (__mmask8) __U, __R);
8006 }
8007
8008 extern __inline __m256
8009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8010 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8011 {
8012 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8013 (__v8sf)
8014 _mm256_setzero_ps (),
8015 (__mmask8) __U, __R);
8016 }
8017
8018 extern __inline __m128
8019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8020 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8021 {
8022 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8023 (__v2df) __B,
8024 __R);
8025 }
8026
8027 extern __inline __m128d
8028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8029 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8030 {
8031 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8032 (__v4sf) __B,
8033 __R);
8034 }
8035 #else
8036 #define _mm512_cvt_roundpd_ps(A, B) \
8037 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
8038
8039 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8040 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8041
8042 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8043 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8044
8045 #define _mm_cvt_roundsd_ss(A, B, C) \
8046 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8047
8048 #define _mm_cvt_roundss_sd(A, B, C) \
8049 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8050 #endif
8051
8052 extern __inline void
8053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8055 {
8056 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8057 }
8058
8059 extern __inline void
8060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8061 _mm512_stream_ps (float *__P, __m512 __A)
8062 {
8063 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8064 }
8065
8066 extern __inline void
8067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8068 _mm512_stream_pd (double *__P, __m512d __A)
8069 {
8070 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8071 }
8072
8073 extern __inline __m512i
8074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8075 _mm512_stream_load_si512 (void *__P)
8076 {
8077 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8078 }
8079
8080 /* Constants for mantissa extraction */
8081 typedef enum
8082 {
8083 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8084 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8085 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8086 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8087 } _MM_MANTISSA_NORM_ENUM;
8088
8089 typedef enum
8090 {
8091 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8092 _MM_MANT_SIGN_zero, /* sign = 0 */
8093 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8094 } _MM_MANTISSA_SIGN_ENUM;
8095
8096 #ifdef __OPTIMIZE__
8097 extern __inline __m128
8098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8099 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8100 {
8101 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8102 (__v4sf) __B,
8103 __R);
8104 }
8105
8106 extern __inline __m128d
8107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8108 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8109 {
8110 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8111 (__v2df) __B,
8112 __R);
8113 }
8114
8115 extern __inline __m512
8116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8117 _mm512_getexp_round_ps (__m512 __A, const int __R)
8118 {
8119 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8120 (__v16sf)
8121 _mm512_undefined_ps (),
8122 (__mmask16) -1, __R);
8123 }
8124
8125 extern __inline __m512
8126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8127 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8128 const int __R)
8129 {
8130 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8131 (__v16sf) __W,
8132 (__mmask16) __U, __R);
8133 }
8134
8135 extern __inline __m512
8136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8137 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8138 {
8139 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8140 (__v16sf)
8141 _mm512_setzero_ps (),
8142 (__mmask16) __U, __R);
8143 }
8144
8145 extern __inline __m512d
8146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8147 _mm512_getexp_round_pd (__m512d __A, const int __R)
8148 {
8149 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8150 (__v8df)
8151 _mm512_undefined_pd (),
8152 (__mmask8) -1, __R);
8153 }
8154
8155 extern __inline __m512d
8156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8157 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8158 const int __R)
8159 {
8160 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8161 (__v8df) __W,
8162 (__mmask8) __U, __R);
8163 }
8164
8165 extern __inline __m512d
8166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8167 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8168 {
8169 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8170 (__v8df)
8171 _mm512_setzero_pd (),
8172 (__mmask8) __U, __R);
8173 }
8174
8175 extern __inline __m512d
8176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8177 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8178 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8179 {
8180 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8181 (__C << 2) | __B,
8182 _mm512_undefined_pd (),
8183 (__mmask8) -1, __R);
8184 }
8185
8186 extern __inline __m512d
8187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8188 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8189 _MM_MANTISSA_NORM_ENUM __B,
8190 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8191 {
8192 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8193 (__C << 2) | __B,
8194 (__v8df) __W, __U,
8195 __R);
8196 }
8197
8198 extern __inline __m512d
8199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8201 _MM_MANTISSA_NORM_ENUM __B,
8202 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8203 {
8204 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8205 (__C << 2) | __B,
8206 (__v8df)
8207 _mm512_setzero_pd (),
8208 __U, __R);
8209 }
8210
8211 extern __inline __m512
8212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8213 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8214 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8215 {
8216 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8217 (__C << 2) | __B,
8218 _mm512_undefined_ps (),
8219 (__mmask16) -1, __R);
8220 }
8221
8222 extern __inline __m512
8223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8224 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8225 _MM_MANTISSA_NORM_ENUM __B,
8226 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8227 {
8228 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8229 (__C << 2) | __B,
8230 (__v16sf) __W, __U,
8231 __R);
8232 }
8233
8234 extern __inline __m512
8235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8236 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8237 _MM_MANTISSA_NORM_ENUM __B,
8238 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8239 {
8240 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8241 (__C << 2) | __B,
8242 (__v16sf)
8243 _mm512_setzero_ps (),
8244 __U, __R);
8245 }
8246
8247 extern __inline __m128d
8248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8249 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8250 _MM_MANTISSA_NORM_ENUM __C,
8251 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8252 {
8253 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8254 (__v2df) __B,
8255 (__D << 2) | __C,
8256 __R);
8257 }
8258
8259 extern __inline __m128
8260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8261 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8262 _MM_MANTISSA_NORM_ENUM __C,
8263 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8264 {
8265 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8266 (__v4sf) __B,
8267 (__D << 2) | __C,
8268 __R);
8269 }
8270
8271 #else
8272 #define _mm512_getmant_round_pd(X, B, C, R) \
8273 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8274 (int)(((C)<<2) | (B)), \
8275 (__v8df)(__m512d)_mm512_undefined_pd(), \
8276 (__mmask8)-1,\
8277 (R)))
8278
8279 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8280 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8281 (int)(((C)<<2) | (B)), \
8282 (__v8df)(__m512d)(W), \
8283 (__mmask8)(U),\
8284 (R)))
8285
8286 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8287 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8288 (int)(((C)<<2) | (B)), \
8289 (__v8df)(__m512d)_mm512_setzero_pd(), \
8290 (__mmask8)(U),\
8291 (R)))
8292 #define _mm512_getmant_round_ps(X, B, C, R) \
8293 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8294 (int)(((C)<<2) | (B)), \
8295 (__v16sf)(__m512)_mm512_undefined_ps(), \
8296 (__mmask16)-1,\
8297 (R)))
8298
8299 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8300 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8301 (int)(((C)<<2) | (B)), \
8302 (__v16sf)(__m512)(W), \
8303 (__mmask16)(U),\
8304 (R)))
8305
8306 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8307 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8308 (int)(((C)<<2) | (B)), \
8309 (__v16sf)(__m512)_mm512_setzero_ps(), \
8310 (__mmask16)(U),\
8311 (R)))
8312 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8313 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8314 (__v2df)(__m128d)(Y), \
8315 (int)(((D)<<2) | (C)), \
8316 (R)))
8317
8318 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8319 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8320 (__v4sf)(__m128)(Y), \
8321 (int)(((D)<<2) | (C)), \
8322 (R)))
8323
8324 #define _mm_getexp_round_ss(A, B, R) \
8325 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8326
8327 #define _mm_getexp_round_sd(A, B, R) \
8328 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8329
8330 #define _mm512_getexp_round_ps(A, R) \
8331 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8332 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8333
8334 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8335 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8336 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8337
8338 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8339 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8340 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8341
8342 #define _mm512_getexp_round_pd(A, R) \
8343 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8344 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8345
8346 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8347 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8348 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8349
8350 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8351 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8352 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8353 #endif
8354
8355 #ifdef __OPTIMIZE__
8356 extern __inline __m512
8357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8358 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8359 {
8360 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8361 (__v16sf)
8362 _mm512_undefined_ps (),
8363 -1, __R);
8364 }
8365
8366 extern __inline __m512
8367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8368 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8369 const int __imm, const int __R)
8370 {
8371 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8372 (__v16sf) __A,
8373 (__mmask16) __B, __R);
8374 }
8375
8376 extern __inline __m512
8377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8378 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8379 const int __imm, const int __R)
8380 {
8381 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8382 __imm,
8383 (__v16sf)
8384 _mm512_setzero_ps (),
8385 (__mmask16) __A, __R);
8386 }
8387
8388 extern __inline __m512d
8389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8390 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8391 {
8392 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8393 (__v8df)
8394 _mm512_undefined_pd (),
8395 -1, __R);
8396 }
8397
8398 extern __inline __m512d
8399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8400 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8401 __m512d __C, const int __imm, const int __R)
8402 {
8403 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8404 (__v8df) __A,
8405 (__mmask8) __B, __R);
8406 }
8407
8408 extern __inline __m512d
8409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8410 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8411 const int __imm, const int __R)
8412 {
8413 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8414 __imm,
8415 (__v8df)
8416 _mm512_setzero_pd (),
8417 (__mmask8) __A, __R);
8418 }
8419
8420 extern __inline __m128
8421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8422 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8423 {
8424 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8425 (__v4sf) __B, __imm, __R);
8426 }
8427
8428 extern __inline __m128d
8429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8430 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8431 const int __R)
8432 {
8433 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8434 (__v2df) __B, __imm, __R);
8435 }
8436
8437 #else
8438 #define _mm512_roundscale_round_ps(A, B, R) \
8439 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8440 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8441 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8442 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8443 (int)(D), \
8444 (__v16sf)(__m512)(A), \
8445 (__mmask16)(B), R))
8446 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8447 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8448 (int)(C), \
8449 (__v16sf)_mm512_setzero_ps(),\
8450 (__mmask16)(A), R))
8451 #define _mm512_roundscale_round_pd(A, B, R) \
8452 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8453 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8454 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8455 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8456 (int)(D), \
8457 (__v8df)(__m512d)(A), \
8458 (__mmask8)(B), R))
8459 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8460 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8461 (int)(C), \
8462 (__v8df)_mm512_setzero_pd(),\
8463 (__mmask8)(A), R))
8464 #define _mm_roundscale_round_ss(A, B, C, R) \
8465 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8466 (__v4sf)(__m128)(B), (int)(C), R))
8467 #define _mm_roundscale_round_sd(A, B, C, R) \
8468 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8469 (__v2df)(__m128d)(B), (int)(C), R))
8470 #endif
8471
8472 extern __inline __m512
8473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8474 _mm512_floor_ps (__m512 __A)
8475 {
8476 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8477 _MM_FROUND_FLOOR,
8478 (__v16sf) __A, -1,
8479 _MM_FROUND_CUR_DIRECTION);
8480 }
8481
8482 extern __inline __m512d
8483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8484 _mm512_floor_pd (__m512d __A)
8485 {
8486 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8487 _MM_FROUND_FLOOR,
8488 (__v8df) __A, -1,
8489 _MM_FROUND_CUR_DIRECTION);
8490 }
8491
8492 extern __inline __m512
8493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8494 _mm512_ceil_ps (__m512 __A)
8495 {
8496 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8497 _MM_FROUND_CEIL,
8498 (__v16sf) __A, -1,
8499 _MM_FROUND_CUR_DIRECTION);
8500 }
8501
8502 extern __inline __m512d
8503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8504 _mm512_ceil_pd (__m512d __A)
8505 {
8506 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8507 _MM_FROUND_CEIL,
8508 (__v8df) __A, -1,
8509 _MM_FROUND_CUR_DIRECTION);
8510 }
8511
8512 extern __inline __m512
8513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8514 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8515 {
8516 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8517 _MM_FROUND_FLOOR,
8518 (__v16sf) __W, __U,
8519 _MM_FROUND_CUR_DIRECTION);
8520 }
8521
8522 extern __inline __m512d
8523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8524 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8525 {
8526 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8527 _MM_FROUND_FLOOR,
8528 (__v8df) __W, __U,
8529 _MM_FROUND_CUR_DIRECTION);
8530 }
8531
8532 extern __inline __m512
8533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8534 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8535 {
8536 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8537 _MM_FROUND_CEIL,
8538 (__v16sf) __W, __U,
8539 _MM_FROUND_CUR_DIRECTION);
8540 }
8541
8542 extern __inline __m512d
8543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8545 {
8546 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8547 _MM_FROUND_CEIL,
8548 (__v8df) __W, __U,
8549 _MM_FROUND_CUR_DIRECTION);
8550 }
8551
8552 #ifdef __OPTIMIZE__
8553 extern __inline __m512i
8554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8556 {
8557 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8558 (__v16si) __B, __imm,
8559 (__v16si)
8560 _mm512_undefined_epi32 (),
8561 (__mmask16) -1);
8562 }
8563
8564 extern __inline __m512i
8565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8567 __m512i __B, const int __imm)
8568 {
8569 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8570 (__v16si) __B, __imm,
8571 (__v16si) __W,
8572 (__mmask16) __U);
8573 }
8574
8575 extern __inline __m512i
8576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8577 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8578 const int __imm)
8579 {
8580 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8581 (__v16si) __B, __imm,
8582 (__v16si)
8583 _mm512_setzero_si512 (),
8584 (__mmask16) __U);
8585 }
8586
8587 extern __inline __m512i
8588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8589 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8590 {
8591 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8592 (__v8di) __B, __imm,
8593 (__v8di)
8594 _mm512_undefined_epi32 (),
8595 (__mmask8) -1);
8596 }
8597
8598 extern __inline __m512i
8599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8600 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8601 __m512i __B, const int __imm)
8602 {
8603 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8604 (__v8di) __B, __imm,
8605 (__v8di) __W,
8606 (__mmask8) __U);
8607 }
8608
8609 extern __inline __m512i
8610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8611 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8612 const int __imm)
8613 {
8614 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8615 (__v8di) __B, __imm,
8616 (__v8di)
8617 _mm512_setzero_si512 (),
8618 (__mmask8) __U);
8619 }
8620 #else
8621 #define _mm512_alignr_epi32(X, Y, C) \
8622 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8623 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
8624 (__mmask16)-1))
8625
8626 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8627 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8628 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8629 (__mmask16)(U)))
8630
8631 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8632 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8633 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8634 (__mmask16)(U)))
8635
8636 #define _mm512_alignr_epi64(X, Y, C) \
8637 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8638 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
8639 (__mmask8)-1))
8640
8641 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8642 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8643 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8644
8645 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8646 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8647 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8648 (__mmask8)(U)))
8649 #endif
8650
8651 extern __inline __mmask16
8652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8653 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8654 {
8655 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8656 (__v16si) __B,
8657 (__mmask16) -1);
8658 }
8659
8660 extern __inline __mmask16
8661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8662 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8663 {
8664 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8665 (__v16si) __B, __U);
8666 }
8667
8668 extern __inline __mmask8
8669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8671 {
8672 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8673 (__v8di) __B, __U);
8674 }
8675
8676 extern __inline __mmask8
8677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8678 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8679 {
8680 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8681 (__v8di) __B,
8682 (__mmask8) -1);
8683 }
8684
8685 extern __inline __mmask16
8686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8687 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8688 {
8689 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8690 (__v16si) __B,
8691 (__mmask16) -1);
8692 }
8693
8694 extern __inline __mmask16
8695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8696 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8697 {
8698 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8699 (__v16si) __B, __U);
8700 }
8701
8702 extern __inline __mmask8
8703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8704 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8705 {
8706 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8707 (__v8di) __B, __U);
8708 }
8709
8710 extern __inline __mmask8
8711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8713 {
8714 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8715 (__v8di) __B,
8716 (__mmask8) -1);
8717 }
8718
8719 extern __inline __mmask16
8720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8721 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8722 {
8723 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8724 (__v16si) __Y, 5,
8725 (__mmask16) -1);
8726 }
8727
8728 extern __inline __mmask16
8729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8731 {
8732 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8733 (__v16si) __Y, 5,
8734 (__mmask16) __M);
8735 }
8736
8737 extern __inline __mmask16
8738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8739 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8740 {
8741 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8742 (__v16si) __Y, 5,
8743 (__mmask16) __M);
8744 }
8745
8746 extern __inline __mmask16
8747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8748 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8749 {
8750 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8751 (__v16si) __Y, 5,
8752 (__mmask16) -1);
8753 }
8754
8755 extern __inline __mmask8
8756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8757 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8758 {
8759 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8760 (__v8di) __Y, 5,
8761 (__mmask8) __M);
8762 }
8763
8764 extern __inline __mmask8
8765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8766 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8767 {
8768 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8769 (__v8di) __Y, 5,
8770 (__mmask8) -1);
8771 }
8772
8773 extern __inline __mmask8
8774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8775 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8776 {
8777 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8778 (__v8di) __Y, 5,
8779 (__mmask8) __M);
8780 }
8781
8782 extern __inline __mmask8
8783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8784 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8785 {
8786 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8787 (__v8di) __Y, 5,
8788 (__mmask8) -1);
8789 }
8790
8791 extern __inline __mmask16
8792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8793 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8794 {
8795 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8796 (__v16si) __Y, 2,
8797 (__mmask16) __M);
8798 }
8799
8800 extern __inline __mmask16
8801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8803 {
8804 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8805 (__v16si) __Y, 2,
8806 (__mmask16) -1);
8807 }
8808
8809 extern __inline __mmask16
8810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8812 {
8813 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8814 (__v16si) __Y, 2,
8815 (__mmask16) __M);
8816 }
8817
8818 extern __inline __mmask16
8819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8820 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8821 {
8822 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8823 (__v16si) __Y, 2,
8824 (__mmask16) -1);
8825 }
8826
8827 extern __inline __mmask8
8828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8829 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8830 {
8831 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8832 (__v8di) __Y, 2,
8833 (__mmask8) __M);
8834 }
8835
8836 extern __inline __mmask8
8837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8839 {
8840 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8841 (__v8di) __Y, 2,
8842 (__mmask8) -1);
8843 }
8844
8845 extern __inline __mmask8
8846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8847 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8848 {
8849 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8850 (__v8di) __Y, 2,
8851 (__mmask8) __M);
8852 }
8853
8854 extern __inline __mmask8
8855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8856 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8857 {
8858 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8859 (__v8di) __Y, 2,
8860 (__mmask8) -1);
8861 }
8862
8863 extern __inline __mmask16
8864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8865 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8866 {
8867 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8868 (__v16si) __Y, 1,
8869 (__mmask16) __M);
8870 }
8871
8872 extern __inline __mmask16
8873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8874 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8875 {
8876 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8877 (__v16si) __Y, 1,
8878 (__mmask16) -1);
8879 }
8880
8881 extern __inline __mmask16
8882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8883 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8884 {
8885 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8886 (__v16si) __Y, 1,
8887 (__mmask16) __M);
8888 }
8889
8890 extern __inline __mmask16
8891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8892 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8893 {
8894 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8895 (__v16si) __Y, 1,
8896 (__mmask16) -1);
8897 }
8898
8899 extern __inline __mmask8
8900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8902 {
8903 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8904 (__v8di) __Y, 1,
8905 (__mmask8) __M);
8906 }
8907
8908 extern __inline __mmask8
8909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8911 {
8912 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8913 (__v8di) __Y, 1,
8914 (__mmask8) -1);
8915 }
8916
8917 extern __inline __mmask8
8918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8919 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8920 {
8921 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8922 (__v8di) __Y, 1,
8923 (__mmask8) __M);
8924 }
8925
8926 extern __inline __mmask8
8927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8928 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8929 {
8930 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8931 (__v8di) __Y, 1,
8932 (__mmask8) -1);
8933 }
8934
8935 extern __inline __mmask16
8936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8937 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8938 {
8939 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8940 (__v16si) __Y, 4,
8941 (__mmask16) -1);
8942 }
8943
8944 extern __inline __mmask16
8945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8946 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8947 {
8948 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8949 (__v16si) __Y, 4,
8950 (__mmask16) __M);
8951 }
8952
8953 extern __inline __mmask16
8954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8955 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8956 {
8957 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8958 (__v16si) __Y, 4,
8959 (__mmask16) __M);
8960 }
8961
8962 extern __inline __mmask16
8963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8964 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8965 {
8966 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8967 (__v16si) __Y, 4,
8968 (__mmask16) -1);
8969 }
8970
8971 extern __inline __mmask8
8972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8973 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8974 {
8975 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8976 (__v8di) __Y, 4,
8977 (__mmask8) __M);
8978 }
8979
8980 extern __inline __mmask8
8981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8982 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8983 {
8984 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8985 (__v8di) __Y, 4,
8986 (__mmask8) -1);
8987 }
8988
8989 extern __inline __mmask8
8990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8991 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8992 {
8993 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8994 (__v8di) __Y, 4,
8995 (__mmask8) __M);
8996 }
8997
8998 extern __inline __mmask8
8999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9000 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9001 {
9002 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9003 (__v8di) __Y, 4,
9004 (__mmask8) -1);
9005 }
9006
9007 #define _MM_CMPINT_EQ 0x0
9008 #define _MM_CMPINT_LT 0x1
9009 #define _MM_CMPINT_LE 0x2
9010 #define _MM_CMPINT_UNUSED 0x3
9011 #define _MM_CMPINT_NE 0x4
9012 #define _MM_CMPINT_NLT 0x5
9013 #define _MM_CMPINT_GE 0x5
9014 #define _MM_CMPINT_NLE 0x6
9015 #define _MM_CMPINT_GT 0x6
9016
9017 #ifdef __OPTIMIZE__
9018 extern __inline __mmask16
9019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9020 _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9021 {
9022 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9023 (__mmask8) __B);
9024 }
9025
9026 extern __inline __mmask16
9027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9028 _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9029 {
9030 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9031 (__mmask8) __B);
9032 }
9033
9034 extern __inline __mmask8
9035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9036 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9037 {
9038 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9039 (__v8di) __Y, __P,
9040 (__mmask8) -1);
9041 }
9042
9043 extern __inline __mmask16
9044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9045 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9046 {
9047 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9048 (__v16si) __Y, __P,
9049 (__mmask16) -1);
9050 }
9051
9052 extern __inline __mmask8
9053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9054 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9055 {
9056 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9057 (__v8di) __Y, __P,
9058 (__mmask8) -1);
9059 }
9060
9061 extern __inline __mmask16
9062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9063 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9064 {
9065 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9066 (__v16si) __Y, __P,
9067 (__mmask16) -1);
9068 }
9069
9070 extern __inline __mmask8
9071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9072 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9073 const int __R)
9074 {
9075 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9076 (__v8df) __Y, __P,
9077 (__mmask8) -1, __R);
9078 }
9079
9080 extern __inline __mmask16
9081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9082 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9083 {
9084 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9085 (__v16sf) __Y, __P,
9086 (__mmask16) -1, __R);
9087 }
9088
9089 extern __inline __mmask8
9090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9091 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9092 const int __P)
9093 {
9094 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9095 (__v8di) __Y, __P,
9096 (__mmask8) __U);
9097 }
9098
9099 extern __inline __mmask16
9100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9101 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9102 const int __P)
9103 {
9104 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9105 (__v16si) __Y, __P,
9106 (__mmask16) __U);
9107 }
9108
9109 extern __inline __mmask8
9110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9111 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9112 const int __P)
9113 {
9114 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9115 (__v8di) __Y, __P,
9116 (__mmask8) __U);
9117 }
9118
9119 extern __inline __mmask16
9120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9121 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9122 const int __P)
9123 {
9124 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9125 (__v16si) __Y, __P,
9126 (__mmask16) __U);
9127 }
9128
9129 extern __inline __mmask8
9130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9131 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9132 const int __P, const int __R)
9133 {
9134 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9135 (__v8df) __Y, __P,
9136 (__mmask8) __U, __R);
9137 }
9138
9139 extern __inline __mmask16
9140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9141 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9142 const int __P, const int __R)
9143 {
9144 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9145 (__v16sf) __Y, __P,
9146 (__mmask16) __U, __R);
9147 }
9148
9149 extern __inline __mmask8
9150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9151 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9152 {
9153 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9154 (__v2df) __Y, __P,
9155 (__mmask8) -1, __R);
9156 }
9157
9158 extern __inline __mmask8
9159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9160 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9161 const int __P, const int __R)
9162 {
9163 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9164 (__v2df) __Y, __P,
9165 (__mmask8) __M, __R);
9166 }
9167
9168 extern __inline __mmask8
9169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9170 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9171 {
9172 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9173 (__v4sf) __Y, __P,
9174 (__mmask8) -1, __R);
9175 }
9176
9177 extern __inline __mmask8
9178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9179 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9180 const int __P, const int __R)
9181 {
9182 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9183 (__v4sf) __Y, __P,
9184 (__mmask8) __M, __R);
9185 }
9186
9187 #else
9188 #define _kshiftli_mask16(X, Y) \
9189 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9190
9191 #define _kshiftri_mask16(X, Y) \
9192 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9193
9194 #define _mm512_cmp_epi64_mask(X, Y, P) \
9195 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9196 (__v8di)(__m512i)(Y), (int)(P),\
9197 (__mmask8)-1))
9198
9199 #define _mm512_cmp_epi32_mask(X, Y, P) \
9200 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9201 (__v16si)(__m512i)(Y), (int)(P), \
9202 (__mmask16)-1))
9203
9204 #define _mm512_cmp_epu64_mask(X, Y, P) \
9205 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9206 (__v8di)(__m512i)(Y), (int)(P),\
9207 (__mmask8)-1))
9208
9209 #define _mm512_cmp_epu32_mask(X, Y, P) \
9210 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9211 (__v16si)(__m512i)(Y), (int)(P), \
9212 (__mmask16)-1))
9213
9214 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9215 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9216 (__v8df)(__m512d)(Y), (int)(P),\
9217 (__mmask8)-1, R))
9218
9219 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9220 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9221 (__v16sf)(__m512)(Y), (int)(P),\
9222 (__mmask16)-1, R))
9223
9224 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9225 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9226 (__v8di)(__m512i)(Y), (int)(P),\
9227 (__mmask8)M))
9228
9229 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9230 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9231 (__v16si)(__m512i)(Y), (int)(P), \
9232 (__mmask16)M))
9233
9234 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9235 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9236 (__v8di)(__m512i)(Y), (int)(P),\
9237 (__mmask8)M))
9238
9239 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9240 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9241 (__v16si)(__m512i)(Y), (int)(P), \
9242 (__mmask16)M))
9243
9244 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9245 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9246 (__v8df)(__m512d)(Y), (int)(P),\
9247 (__mmask8)M, R))
9248
9249 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9250 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9251 (__v16sf)(__m512)(Y), (int)(P),\
9252 (__mmask16)M, R))
9253
9254 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9255 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9256 (__v2df)(__m128d)(Y), (int)(P),\
9257 (__mmask8)-1, R))
9258
9259 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9260 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9261 (__v2df)(__m128d)(Y), (int)(P),\
9262 (M), R))
9263
9264 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9265 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9266 (__v4sf)(__m128)(Y), (int)(P), \
9267 (__mmask8)-1, R))
9268
9269 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9270 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9271 (__v4sf)(__m128)(Y), (int)(P), \
9272 (M), R))
9273 #endif
9274
9275 #ifdef __OPTIMIZE__
9276 extern __inline __m512
9277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9278 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
9279 {
9280 __m512 __v1_old = _mm512_undefined_ps ();
9281 __mmask16 __mask = 0xFFFF;
9282
9283 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9284 __addr,
9285 (__v16si) __index,
9286 __mask, __scale);
9287 }
9288
9289 extern __inline __m512
9290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9291 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
9292 __m512i __index, void const *__addr, int __scale)
9293 {
9294 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9295 __addr,
9296 (__v16si) __index,
9297 __mask, __scale);
9298 }
9299
9300 extern __inline __m512d
9301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9302 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
9303 {
9304 __m512d __v1_old = _mm512_undefined_pd ();
9305 __mmask8 __mask = 0xFF;
9306
9307 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9308 __addr,
9309 (__v8si) __index, __mask,
9310 __scale);
9311 }
9312
9313 extern __inline __m512d
9314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9315 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9316 __m256i __index, void const *__addr, int __scale)
9317 {
9318 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9319 __addr,
9320 (__v8si) __index,
9321 __mask, __scale);
9322 }
9323
9324 extern __inline __m256
9325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9326 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
9327 {
9328 __m256 __v1_old = _mm256_undefined_ps ();
9329 __mmask8 __mask = 0xFF;
9330
9331 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9332 __addr,
9333 (__v8di) __index, __mask,
9334 __scale);
9335 }
9336
9337 extern __inline __m256
9338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9339 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9340 __m512i __index, void const *__addr, int __scale)
9341 {
9342 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9343 __addr,
9344 (__v8di) __index,
9345 __mask, __scale);
9346 }
9347
9348 extern __inline __m512d
9349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9350 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
9351 {
9352 __m512d __v1_old = _mm512_undefined_pd ();
9353 __mmask8 __mask = 0xFF;
9354
9355 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9356 __addr,
9357 (__v8di) __index, __mask,
9358 __scale);
9359 }
9360
9361 extern __inline __m512d
9362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9363 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9364 __m512i __index, void const *__addr, int __scale)
9365 {
9366 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9367 __addr,
9368 (__v8di) __index,
9369 __mask, __scale);
9370 }
9371
9372 extern __inline __m512i
9373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9374 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
9375 {
9376 __m512i __v1_old = _mm512_undefined_epi32 ();
9377 __mmask16 __mask = 0xFFFF;
9378
9379 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9380 __addr,
9381 (__v16si) __index,
9382 __mask, __scale);
9383 }
9384
9385 extern __inline __m512i
9386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9387 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9388 __m512i __index, void const *__addr, int __scale)
9389 {
9390 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9391 __addr,
9392 (__v16si) __index,
9393 __mask, __scale);
9394 }
9395
9396 extern __inline __m512i
9397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9398 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
9399 {
9400 __m512i __v1_old = _mm512_undefined_epi32 ();
9401 __mmask8 __mask = 0xFF;
9402
9403 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9404 __addr,
9405 (__v8si) __index, __mask,
9406 __scale);
9407 }
9408
9409 extern __inline __m512i
9410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9411 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9412 __m256i __index, void const *__addr,
9413 int __scale)
9414 {
9415 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9416 __addr,
9417 (__v8si) __index,
9418 __mask, __scale);
9419 }
9420
9421 extern __inline __m256i
9422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9423 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
9424 {
9425 __m256i __v1_old = _mm256_undefined_si256 ();
9426 __mmask8 __mask = 0xFF;
9427
9428 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9429 __addr,
9430 (__v8di) __index,
9431 __mask, __scale);
9432 }
9433
9434 extern __inline __m256i
9435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9436 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9437 __m512i __index, void const *__addr, int __scale)
9438 {
9439 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9440 __addr,
9441 (__v8di) __index,
9442 __mask, __scale);
9443 }
9444
9445 extern __inline __m512i
9446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9447 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
9448 {
9449 __m512i __v1_old = _mm512_undefined_epi32 ();
9450 __mmask8 __mask = 0xFF;
9451
9452 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9453 __addr,
9454 (__v8di) __index, __mask,
9455 __scale);
9456 }
9457
9458 extern __inline __m512i
9459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9460 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9461 __m512i __index, void const *__addr,
9462 int __scale)
9463 {
9464 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9465 __addr,
9466 (__v8di) __index,
9467 __mask, __scale);
9468 }
9469
9470 extern __inline void
9471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9472 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
9473 {
9474 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9475 (__v16si) __index, (__v16sf) __v1, __scale);
9476 }
9477
9478 extern __inline void
9479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9480 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
9481 __m512i __index, __m512 __v1, int __scale)
9482 {
9483 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9484 (__v16sf) __v1, __scale);
9485 }
9486
9487 extern __inline void
9488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9489 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
9490 int __scale)
9491 {
9492 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9493 (__v8si) __index, (__v8df) __v1, __scale);
9494 }
9495
9496 extern __inline void
9497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9498 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
9499 __m256i __index, __m512d __v1, int __scale)
9500 {
9501 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9502 (__v8df) __v1, __scale);
9503 }
9504
9505 extern __inline void
9506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9507 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
9508 {
9509 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9510 (__v8di) __index, (__v8sf) __v1, __scale);
9511 }
9512
9513 extern __inline void
9514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9515 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
9516 __m512i __index, __m256 __v1, int __scale)
9517 {
9518 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9519 (__v8sf) __v1, __scale);
9520 }
9521
9522 extern __inline void
9523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9524 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
9525 int __scale)
9526 {
9527 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9528 (__v8di) __index, (__v8df) __v1, __scale);
9529 }
9530
9531 extern __inline void
9532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9533 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
9534 __m512i __index, __m512d __v1, int __scale)
9535 {
9536 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9537 (__v8df) __v1, __scale);
9538 }
9539
9540 extern __inline void
9541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9542 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
9543 __m512i __v1, int __scale)
9544 {
9545 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9546 (__v16si) __index, (__v16si) __v1, __scale);
9547 }
9548
9549 extern __inline void
9550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9551 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
9552 __m512i __index, __m512i __v1, int __scale)
9553 {
9554 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9555 (__v16si) __v1, __scale);
9556 }
9557
9558 extern __inline void
9559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9560 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
9561 __m512i __v1, int __scale)
9562 {
9563 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9564 (__v8si) __index, (__v8di) __v1, __scale);
9565 }
9566
9567 extern __inline void
9568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9569 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
9570 __m256i __index, __m512i __v1, int __scale)
9571 {
9572 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9573 (__v8di) __v1, __scale);
9574 }
9575
9576 extern __inline void
9577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9578 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
9579 __m256i __v1, int __scale)
9580 {
9581 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9582 (__v8di) __index, (__v8si) __v1, __scale);
9583 }
9584
9585 extern __inline void
9586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9587 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
9588 __m512i __index, __m256i __v1, int __scale)
9589 {
9590 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9591 (__v8si) __v1, __scale);
9592 }
9593
9594 extern __inline void
9595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9596 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
9597 __m512i __v1, int __scale)
9598 {
9599 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9600 (__v8di) __index, (__v8di) __v1, __scale);
9601 }
9602
9603 extern __inline void
9604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9605 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
9606 __m512i __index, __m512i __v1, int __scale)
9607 {
9608 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9609 (__v8di) __v1, __scale);
9610 }
9611 #else
9612 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9613 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9614 (void const *)ADDR, \
9615 (__v16si)(__m512i)INDEX, \
9616 (__mmask16)0xFFFF, (int)SCALE)
9617
9618 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9619 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9620 (void const *)ADDR, \
9621 (__v16si)(__m512i)INDEX, \
9622 (__mmask16)MASK, (int)SCALE)
9623
9624 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9625 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9626 (void const *)ADDR, \
9627 (__v8si)(__m256i)INDEX, \
9628 (__mmask8)0xFF, (int)SCALE)
9629
9630 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9631 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9632 (void const *)ADDR, \
9633 (__v8si)(__m256i)INDEX, \
9634 (__mmask8)MASK, (int)SCALE)
9635
9636 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9637 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9638 (void const *)ADDR, \
9639 (__v8di)(__m512i)INDEX, \
9640 (__mmask8)0xFF, (int)SCALE)
9641
9642 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9643 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9644 (void const *)ADDR, \
9645 (__v8di)(__m512i)INDEX, \
9646 (__mmask8)MASK, (int)SCALE)
9647
9648 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9649 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9650 (void const *)ADDR, \
9651 (__v8di)(__m512i)INDEX, \
9652 (__mmask8)0xFF, (int)SCALE)
9653
9654 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9655 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9656 (void const *)ADDR, \
9657 (__v8di)(__m512i)INDEX, \
9658 (__mmask8)MASK, (int)SCALE)
9659
9660 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9661 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
9662 (void const *)ADDR, \
9663 (__v16si)(__m512i)INDEX, \
9664 (__mmask16)0xFFFF, (int)SCALE)
9665
9666 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9667 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9668 (void const *)ADDR, \
9669 (__v16si)(__m512i)INDEX, \
9670 (__mmask16)MASK, (int)SCALE)
9671
9672 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9673 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
9674 (void const *)ADDR, \
9675 (__v8si)(__m256i)INDEX, \
9676 (__mmask8)0xFF, (int)SCALE)
9677
9678 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9679 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9680 (void const *)ADDR, \
9681 (__v8si)(__m256i)INDEX, \
9682 (__mmask8)MASK, (int)SCALE)
9683
9684 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9685 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9686 (void const *)ADDR, \
9687 (__v8di)(__m512i)INDEX, \
9688 (__mmask8)0xFF, (int)SCALE)
9689
9690 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9691 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9692 (void const *)ADDR, \
9693 (__v8di)(__m512i)INDEX, \
9694 (__mmask8)MASK, (int)SCALE)
9695
9696 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9697 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
9698 (void const *)ADDR, \
9699 (__v8di)(__m512i)INDEX, \
9700 (__mmask8)0xFF, (int)SCALE)
9701
9702 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9703 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9704 (void const *)ADDR, \
9705 (__v8di)(__m512i)INDEX, \
9706 (__mmask8)MASK, (int)SCALE)
9707
9708 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9709 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
9710 (__v16si)(__m512i)INDEX, \
9711 (__v16sf)(__m512)V1, (int)SCALE)
9712
9713 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9714 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
9715 (__v16si)(__m512i)INDEX, \
9716 (__v16sf)(__m512)V1, (int)SCALE)
9717
9718 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9719 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
9720 (__v8si)(__m256i)INDEX, \
9721 (__v8df)(__m512d)V1, (int)SCALE)
9722
9723 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9724 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
9725 (__v8si)(__m256i)INDEX, \
9726 (__v8df)(__m512d)V1, (int)SCALE)
9727
9728 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9729 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
9730 (__v8di)(__m512i)INDEX, \
9731 (__v8sf)(__m256)V1, (int)SCALE)
9732
9733 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9734 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
9735 (__v8di)(__m512i)INDEX, \
9736 (__v8sf)(__m256)V1, (int)SCALE)
9737
9738 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9739 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
9740 (__v8di)(__m512i)INDEX, \
9741 (__v8df)(__m512d)V1, (int)SCALE)
9742
9743 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9744 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
9745 (__v8di)(__m512i)INDEX, \
9746 (__v8df)(__m512d)V1, (int)SCALE)
9747
9748 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9749 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
9750 (__v16si)(__m512i)INDEX, \
9751 (__v16si)(__m512i)V1, (int)SCALE)
9752
9753 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9754 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
9755 (__v16si)(__m512i)INDEX, \
9756 (__v16si)(__m512i)V1, (int)SCALE)
9757
9758 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9759 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
9760 (__v8si)(__m256i)INDEX, \
9761 (__v8di)(__m512i)V1, (int)SCALE)
9762
9763 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9764 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
9765 (__v8si)(__m256i)INDEX, \
9766 (__v8di)(__m512i)V1, (int)SCALE)
9767
9768 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9769 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
9770 (__v8di)(__m512i)INDEX, \
9771 (__v8si)(__m256i)V1, (int)SCALE)
9772
9773 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9774 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
9775 (__v8di)(__m512i)INDEX, \
9776 (__v8si)(__m256i)V1, (int)SCALE)
9777
9778 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9779 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
9780 (__v8di)(__m512i)INDEX, \
9781 (__v8di)(__m512i)V1, (int)SCALE)
9782
9783 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9784 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
9785 (__v8di)(__m512i)INDEX, \
9786 (__v8di)(__m512i)V1, (int)SCALE)
9787 #endif
9788
9789 extern __inline __m512d
9790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9791 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9792 {
9793 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9794 (__v8df) __W,
9795 (__mmask8) __U);
9796 }
9797
9798 extern __inline __m512d
9799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9800 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9801 {
9802 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9803 (__v8df)
9804 _mm512_setzero_pd (),
9805 (__mmask8) __U);
9806 }
9807
9808 extern __inline void
9809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9810 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9811 {
9812 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9813 (__mmask8) __U);
9814 }
9815
9816 extern __inline __m512
9817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9818 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9819 {
9820 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9821 (__v16sf) __W,
9822 (__mmask16) __U);
9823 }
9824
9825 extern __inline __m512
9826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9827 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9828 {
9829 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9830 (__v16sf)
9831 _mm512_setzero_ps (),
9832 (__mmask16) __U);
9833 }
9834
9835 extern __inline void
9836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9837 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9838 {
9839 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9840 (__mmask16) __U);
9841 }
9842
9843 extern __inline __m512i
9844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9845 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9846 {
9847 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9848 (__v8di) __W,
9849 (__mmask8) __U);
9850 }
9851
9852 extern __inline __m512i
9853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9854 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9855 {
9856 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9857 (__v8di)
9858 _mm512_setzero_si512 (),
9859 (__mmask8) __U);
9860 }
9861
9862 extern __inline void
9863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9864 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9865 {
9866 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9867 (__mmask8) __U);
9868 }
9869
9870 extern __inline __m512i
9871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9872 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9873 {
9874 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9875 (__v16si) __W,
9876 (__mmask16) __U);
9877 }
9878
9879 extern __inline __m512i
9880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9881 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9882 {
9883 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9884 (__v16si)
9885 _mm512_setzero_si512 (),
9886 (__mmask16) __U);
9887 }
9888
9889 extern __inline void
9890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9891 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9892 {
9893 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9894 (__mmask16) __U);
9895 }
9896
9897 extern __inline __m512d
9898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9899 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9900 {
9901 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9902 (__v8df) __W,
9903 (__mmask8) __U);
9904 }
9905
9906 extern __inline __m512d
9907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9908 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9909 {
9910 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9911 (__v8df)
9912 _mm512_setzero_pd (),
9913 (__mmask8) __U);
9914 }
9915
9916 extern __inline __m512d
9917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9918 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9919 {
9920 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9921 (__v8df) __W,
9922 (__mmask8) __U);
9923 }
9924
9925 extern __inline __m512d
9926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9927 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9928 {
9929 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9930 (__v8df)
9931 _mm512_setzero_pd (),
9932 (__mmask8) __U);
9933 }
9934
9935 extern __inline __m512
9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9938 {
9939 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9940 (__v16sf) __W,
9941 (__mmask16) __U);
9942 }
9943
9944 extern __inline __m512
9945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9946 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9947 {
9948 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9949 (__v16sf)
9950 _mm512_setzero_ps (),
9951 (__mmask16) __U);
9952 }
9953
9954 extern __inline __m512
9955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9956 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9957 {
9958 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9959 (__v16sf) __W,
9960 (__mmask16) __U);
9961 }
9962
9963 extern __inline __m512
9964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9965 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9966 {
9967 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9968 (__v16sf)
9969 _mm512_setzero_ps (),
9970 (__mmask16) __U);
9971 }
9972
9973 extern __inline __m512i
9974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9975 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9976 {
9977 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9978 (__v8di) __W,
9979 (__mmask8) __U);
9980 }
9981
9982 extern __inline __m512i
9983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9984 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9985 {
9986 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9987 (__v8di)
9988 _mm512_setzero_si512 (),
9989 (__mmask8) __U);
9990 }
9991
9992 extern __inline __m512i
9993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9994 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9995 {
9996 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9997 (__v8di) __W,
9998 (__mmask8) __U);
9999 }
10000
10001 extern __inline __m512i
10002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10003 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10004 {
10005 return (__m512i)
10006 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10007 (__v8di)
10008 _mm512_setzero_si512 (),
10009 (__mmask8) __U);
10010 }
10011
10012 extern __inline __m512i
10013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10014 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10015 {
10016 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10017 (__v16si) __W,
10018 (__mmask16) __U);
10019 }
10020
10021 extern __inline __m512i
10022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10023 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10024 {
10025 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10026 (__v16si)
10027 _mm512_setzero_si512 (),
10028 (__mmask16) __U);
10029 }
10030
10031 extern __inline __m512i
10032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10033 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10034 {
10035 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10036 (__v16si) __W,
10037 (__mmask16) __U);
10038 }
10039
10040 extern __inline __m512i
10041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10042 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10043 {
10044 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10045 (__v16si)
10046 _mm512_setzero_si512
10047 (), (__mmask16) __U);
10048 }
10049
10050 /* Mask arithmetic operations */
10051 #define _kand_mask16 _mm512_kand
10052 #define _kandn_mask16 _mm512_kandn
10053 #define _knot_mask16 _mm512_knot
10054 #define _kor_mask16 _mm512_kor
10055 #define _kxnor_mask16 _mm512_kxnor
10056 #define _kxor_mask16 _mm512_kxor
10057
10058 extern __inline unsigned char
10059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10060 _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10061 {
10062 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10063 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10064 }
10065
10066 extern __inline unsigned char
10067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10068 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10069 {
10070 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10071 (__mmask16) __B);
10072 }
10073
10074 extern __inline unsigned char
10075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10076 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10077 {
10078 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10079 (__mmask16) __B);
10080 }
10081
10082 extern __inline unsigned int
10083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10084 _cvtmask16_u32 (__mmask16 __A)
10085 {
10086 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10087 }
10088
10089 extern __inline __mmask16
10090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10091 _cvtu32_mask16 (unsigned int __A)
10092 {
10093 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10094 }
10095
10096 extern __inline __mmask16
10097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10098 _load_mask16 (__mmask16 *__A)
10099 {
10100 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10101 }
10102
10103 extern __inline void
10104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105 _store_mask16 (__mmask16 *__A, __mmask16 __B)
10106 {
10107 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10108 }
10109
10110 extern __inline __mmask16
10111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10112 _mm512_kand (__mmask16 __A, __mmask16 __B)
10113 {
10114 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10115 }
10116
10117 extern __inline __mmask16
10118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10119 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10120 {
10121 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10122 (__mmask16) __B);
10123 }
10124
10125 extern __inline __mmask16
10126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10127 _mm512_kor (__mmask16 __A, __mmask16 __B)
10128 {
10129 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10130 }
10131
10132 extern __inline int
10133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10134 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10135 {
10136 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10137 (__mmask16) __B);
10138 }
10139
10140 extern __inline int
10141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10142 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10143 {
10144 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10145 (__mmask16) __B);
10146 }
10147
10148 extern __inline __mmask16
10149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10150 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10151 {
10152 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10153 }
10154
10155 extern __inline __mmask16
10156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10157 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10158 {
10159 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10160 }
10161
10162 extern __inline __mmask16
10163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10164 _mm512_knot (__mmask16 __A)
10165 {
10166 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10167 }
10168
10169 extern __inline __mmask16
10170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10171 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10172 {
10173 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10174 }
10175
10176 extern __inline __mmask16
10177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10178 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10179 {
10180 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10181 }
10182
10183 #ifdef __OPTIMIZE__
10184 extern __inline __m512i
10185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10186 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10187 const int __imm)
10188 {
10189 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10190 (__v4si) __D,
10191 __imm,
10192 (__v16si)
10193 _mm512_setzero_si512 (),
10194 __B);
10195 }
10196
10197 extern __inline __m512
10198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10199 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10200 const int __imm)
10201 {
10202 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10203 (__v4sf) __D,
10204 __imm,
10205 (__v16sf)
10206 _mm512_setzero_ps (), __B);
10207 }
10208
10209 extern __inline __m512i
10210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10211 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10212 __m128i __D, const int __imm)
10213 {
10214 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10215 (__v4si) __D,
10216 __imm,
10217 (__v16si) __A,
10218 __B);
10219 }
10220
10221 extern __inline __m512
10222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10223 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10224 __m128 __D, const int __imm)
10225 {
10226 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10227 (__v4sf) __D,
10228 __imm,
10229 (__v16sf) __A, __B);
10230 }
10231 #else
10232 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10233 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10234 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10235 (__mmask8)(A)))
10236
10237 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10238 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10239 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10240 (__mmask8)(A)))
10241
10242 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10243 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10244 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10245 (__mmask8)(B)))
10246
10247 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10248 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10249 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10250 (__mmask8)(B)))
10251 #endif
10252
10253 extern __inline __m512i
10254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10255 _mm512_max_epi64 (__m512i __A, __m512i __B)
10256 {
10257 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10258 (__v8di) __B,
10259 (__v8di)
10260 _mm512_undefined_epi32 (),
10261 (__mmask8) -1);
10262 }
10263
10264 extern __inline __m512i
10265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10266 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10267 {
10268 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10269 (__v8di) __B,
10270 (__v8di)
10271 _mm512_setzero_si512 (),
10272 __M);
10273 }
10274
10275 extern __inline __m512i
10276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10278 {
10279 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10280 (__v8di) __B,
10281 (__v8di) __W, __M);
10282 }
10283
10284 extern __inline __m512i
10285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10286 _mm512_min_epi64 (__m512i __A, __m512i __B)
10287 {
10288 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10289 (__v8di) __B,
10290 (__v8di)
10291 _mm512_undefined_epi32 (),
10292 (__mmask8) -1);
10293 }
10294
10295 extern __inline __m512i
10296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10297 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10298 {
10299 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10300 (__v8di) __B,
10301 (__v8di) __W, __M);
10302 }
10303
10304 extern __inline __m512i
10305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10306 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10307 {
10308 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10309 (__v8di) __B,
10310 (__v8di)
10311 _mm512_setzero_si512 (),
10312 __M);
10313 }
10314
10315 extern __inline __m512i
10316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10317 _mm512_max_epu64 (__m512i __A, __m512i __B)
10318 {
10319 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10320 (__v8di) __B,
10321 (__v8di)
10322 _mm512_undefined_epi32 (),
10323 (__mmask8) -1);
10324 }
10325
10326 extern __inline __m512i
10327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10328 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10329 {
10330 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10331 (__v8di) __B,
10332 (__v8di)
10333 _mm512_setzero_si512 (),
10334 __M);
10335 }
10336
10337 extern __inline __m512i
10338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10339 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10340 {
10341 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10342 (__v8di) __B,
10343 (__v8di) __W, __M);
10344 }
10345
10346 extern __inline __m512i
10347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10348 _mm512_min_epu64 (__m512i __A, __m512i __B)
10349 {
10350 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10351 (__v8di) __B,
10352 (__v8di)
10353 _mm512_undefined_epi32 (),
10354 (__mmask8) -1);
10355 }
10356
10357 extern __inline __m512i
10358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10359 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10360 {
10361 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10362 (__v8di) __B,
10363 (__v8di) __W, __M);
10364 }
10365
10366 extern __inline __m512i
10367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10368 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10369 {
10370 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10371 (__v8di) __B,
10372 (__v8di)
10373 _mm512_setzero_si512 (),
10374 __M);
10375 }
10376
10377 extern __inline __m512i
10378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10379 _mm512_max_epi32 (__m512i __A, __m512i __B)
10380 {
10381 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10382 (__v16si) __B,
10383 (__v16si)
10384 _mm512_undefined_epi32 (),
10385 (__mmask16) -1);
10386 }
10387
10388 extern __inline __m512i
10389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10390 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10391 {
10392 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10393 (__v16si) __B,
10394 (__v16si)
10395 _mm512_setzero_si512 (),
10396 __M);
10397 }
10398
10399 extern __inline __m512i
10400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10401 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10402 {
10403 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10404 (__v16si) __B,
10405 (__v16si) __W, __M);
10406 }
10407
10408 extern __inline __m512i
10409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10410 _mm512_min_epi32 (__m512i __A, __m512i __B)
10411 {
10412 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10413 (__v16si) __B,
10414 (__v16si)
10415 _mm512_undefined_epi32 (),
10416 (__mmask16) -1);
10417 }
10418
10419 extern __inline __m512i
10420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10422 {
10423 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10424 (__v16si) __B,
10425 (__v16si)
10426 _mm512_setzero_si512 (),
10427 __M);
10428 }
10429
10430 extern __inline __m512i
10431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10432 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10433 {
10434 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10435 (__v16si) __B,
10436 (__v16si) __W, __M);
10437 }
10438
10439 extern __inline __m512i
10440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10441 _mm512_max_epu32 (__m512i __A, __m512i __B)
10442 {
10443 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10444 (__v16si) __B,
10445 (__v16si)
10446 _mm512_undefined_epi32 (),
10447 (__mmask16) -1);
10448 }
10449
10450 extern __inline __m512i
10451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10452 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10453 {
10454 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10455 (__v16si) __B,
10456 (__v16si)
10457 _mm512_setzero_si512 (),
10458 __M);
10459 }
10460
10461 extern __inline __m512i
10462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10463 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10464 {
10465 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10466 (__v16si) __B,
10467 (__v16si) __W, __M);
10468 }
10469
10470 extern __inline __m512i
10471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10472 _mm512_min_epu32 (__m512i __A, __m512i __B)
10473 {
10474 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10475 (__v16si) __B,
10476 (__v16si)
10477 _mm512_undefined_epi32 (),
10478 (__mmask16) -1);
10479 }
10480
10481 extern __inline __m512i
10482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10483 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10484 {
10485 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10486 (__v16si) __B,
10487 (__v16si)
10488 _mm512_setzero_si512 (),
10489 __M);
10490 }
10491
10492 extern __inline __m512i
10493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10494 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10495 {
10496 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10497 (__v16si) __B,
10498 (__v16si) __W, __M);
10499 }
10500
10501 extern __inline __m512
10502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10503 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10504 {
10505 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10506 (__v16sf) __B,
10507 (__v16sf)
10508 _mm512_undefined_ps (),
10509 (__mmask16) -1);
10510 }
10511
10512 extern __inline __m512
10513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10514 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10515 {
10516 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10517 (__v16sf) __B,
10518 (__v16sf) __W,
10519 (__mmask16) __U);
10520 }
10521
10522 extern __inline __m512
10523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10524 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10525 {
10526 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10527 (__v16sf) __B,
10528 (__v16sf)
10529 _mm512_setzero_ps (),
10530 (__mmask16) __U);
10531 }
10532
10533 #ifdef __OPTIMIZE__
10534 extern __inline __m128d
10535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10536 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10537 {
10538 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10539 (__v2df) __B,
10540 __R);
10541 }
10542
10543 extern __inline __m128
10544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10545 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10546 {
10547 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10548 (__v4sf) __B,
10549 __R);
10550 }
10551
10552 extern __inline __m128d
10553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10554 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10555 {
10556 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10557 (__v2df) __B,
10558 __R);
10559 }
10560
10561 extern __inline __m128
10562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10563 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10564 {
10565 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10566 (__v4sf) __B,
10567 __R);
10568 }
10569
10570 #else
10571 #define _mm_max_round_sd(A, B, C) \
10572 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10573
10574 #define _mm_max_round_ss(A, B, C) \
10575 (__m128)__builtin_ia32_addss_round(A, B, C)
10576
10577 #define _mm_min_round_sd(A, B, C) \
10578 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10579
10580 #define _mm_min_round_ss(A, B, C) \
10581 (__m128)__builtin_ia32_subss_round(A, B, C)
10582 #endif
10583
10584 extern __inline __m512d
10585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10586 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10587 {
10588 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10589 (__v8df) __W,
10590 (__mmask8) __U);
10591 }
10592
10593 extern __inline __m512
10594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10595 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10596 {
10597 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10598 (__v16sf) __W,
10599 (__mmask16) __U);
10600 }
10601
10602 extern __inline __m512i
10603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10604 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10605 {
10606 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10607 (__v8di) __W,
10608 (__mmask8) __U);
10609 }
10610
10611 extern __inline __m512i
10612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10613 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10614 {
10615 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10616 (__v16si) __W,
10617 (__mmask16) __U);
10618 }
10619
10620 #ifdef __OPTIMIZE__
10621 extern __inline __m128d
10622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10623 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10624 {
10625 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10626 (__v2df) __A,
10627 (__v2df) __B,
10628 __R);
10629 }
10630
10631 extern __inline __m128
10632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10633 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10634 {
10635 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10636 (__v4sf) __A,
10637 (__v4sf) __B,
10638 __R);
10639 }
10640
10641 extern __inline __m128d
10642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10643 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10644 {
10645 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10646 (__v2df) __A,
10647 -(__v2df) __B,
10648 __R);
10649 }
10650
10651 extern __inline __m128
10652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10653 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10654 {
10655 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10656 (__v4sf) __A,
10657 -(__v4sf) __B,
10658 __R);
10659 }
10660
10661 extern __inline __m128d
10662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10663 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10664 {
10665 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10666 -(__v2df) __A,
10667 (__v2df) __B,
10668 __R);
10669 }
10670
10671 extern __inline __m128
10672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10673 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10674 {
10675 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10676 -(__v4sf) __A,
10677 (__v4sf) __B,
10678 __R);
10679 }
10680
10681 extern __inline __m128d
10682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10683 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10684 {
10685 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10686 -(__v2df) __A,
10687 -(__v2df) __B,
10688 __R);
10689 }
10690
10691 extern __inline __m128
10692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10693 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10694 {
10695 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10696 -(__v4sf) __A,
10697 -(__v4sf) __B,
10698 __R);
10699 }
10700 #else
10701 #define _mm_fmadd_round_sd(A, B, C, R) \
10702 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10703
10704 #define _mm_fmadd_round_ss(A, B, C, R) \
10705 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10706
10707 #define _mm_fmsub_round_sd(A, B, C, R) \
10708 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10709
10710 #define _mm_fmsub_round_ss(A, B, C, R) \
10711 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10712
10713 #define _mm_fnmadd_round_sd(A, B, C, R) \
10714 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10715
10716 #define _mm_fnmadd_round_ss(A, B, C, R) \
10717 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10718
10719 #define _mm_fnmsub_round_sd(A, B, C, R) \
10720 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10721
10722 #define _mm_fnmsub_round_ss(A, B, C, R) \
10723 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10724 #endif
10725
10726 #ifdef __OPTIMIZE__
10727 extern __inline int
10728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10729 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10730 {
10731 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10732 }
10733
10734 extern __inline int
10735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10737 {
10738 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10739 }
10740 #else
10741 #define _mm_comi_round_ss(A, B, C, D)\
10742 __builtin_ia32_vcomiss(A, B, C, D)
10743 #define _mm_comi_round_sd(A, B, C, D)\
10744 __builtin_ia32_vcomisd(A, B, C, D)
10745 #endif
10746
10747 extern __inline __m512d
10748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10749 _mm512_sqrt_pd (__m512d __A)
10750 {
10751 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10752 (__v8df)
10753 _mm512_undefined_pd (),
10754 (__mmask8) -1,
10755 _MM_FROUND_CUR_DIRECTION);
10756 }
10757
10758 extern __inline __m512d
10759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10760 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10761 {
10762 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10763 (__v8df) __W,
10764 (__mmask8) __U,
10765 _MM_FROUND_CUR_DIRECTION);
10766 }
10767
10768 extern __inline __m512d
10769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10770 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10771 {
10772 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10773 (__v8df)
10774 _mm512_setzero_pd (),
10775 (__mmask8) __U,
10776 _MM_FROUND_CUR_DIRECTION);
10777 }
10778
10779 extern __inline __m512
10780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10781 _mm512_sqrt_ps (__m512 __A)
10782 {
10783 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10784 (__v16sf)
10785 _mm512_undefined_ps (),
10786 (__mmask16) -1,
10787 _MM_FROUND_CUR_DIRECTION);
10788 }
10789
10790 extern __inline __m512
10791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10792 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10793 {
10794 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10795 (__v16sf) __W,
10796 (__mmask16) __U,
10797 _MM_FROUND_CUR_DIRECTION);
10798 }
10799
10800 extern __inline __m512
10801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10802 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10803 {
10804 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10805 (__v16sf)
10806 _mm512_setzero_ps (),
10807 (__mmask16) __U,
10808 _MM_FROUND_CUR_DIRECTION);
10809 }
10810
10811 extern __inline __m512d
10812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10813 _mm512_add_pd (__m512d __A, __m512d __B)
10814 {
10815 return (__m512d) ((__v8df)__A + (__v8df)__B);
10816 }
10817
10818 extern __inline __m512d
10819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10820 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10821 {
10822 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10823 (__v8df) __B,
10824 (__v8df) __W,
10825 (__mmask8) __U,
10826 _MM_FROUND_CUR_DIRECTION);
10827 }
10828
10829 extern __inline __m512d
10830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10831 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10832 {
10833 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10834 (__v8df) __B,
10835 (__v8df)
10836 _mm512_setzero_pd (),
10837 (__mmask8) __U,
10838 _MM_FROUND_CUR_DIRECTION);
10839 }
10840
10841 extern __inline __m512
10842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10843 _mm512_add_ps (__m512 __A, __m512 __B)
10844 {
10845 return (__m512) ((__v16sf)__A + (__v16sf)__B);
10846 }
10847
10848 extern __inline __m512
10849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10850 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10851 {
10852 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10853 (__v16sf) __B,
10854 (__v16sf) __W,
10855 (__mmask16) __U,
10856 _MM_FROUND_CUR_DIRECTION);
10857 }
10858
10859 extern __inline __m512
10860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10861 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10862 {
10863 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10864 (__v16sf) __B,
10865 (__v16sf)
10866 _mm512_setzero_ps (),
10867 (__mmask16) __U,
10868 _MM_FROUND_CUR_DIRECTION);
10869 }
10870
10871 extern __inline __m512d
10872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10873 _mm512_sub_pd (__m512d __A, __m512d __B)
10874 {
10875 return (__m512d) ((__v8df)__A - (__v8df)__B);
10876 }
10877
10878 extern __inline __m512d
10879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10880 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10881 {
10882 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10883 (__v8df) __B,
10884 (__v8df) __W,
10885 (__mmask8) __U,
10886 _MM_FROUND_CUR_DIRECTION);
10887 }
10888
10889 extern __inline __m512d
10890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10892 {
10893 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10894 (__v8df) __B,
10895 (__v8df)
10896 _mm512_setzero_pd (),
10897 (__mmask8) __U,
10898 _MM_FROUND_CUR_DIRECTION);
10899 }
10900
10901 extern __inline __m512
10902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10903 _mm512_sub_ps (__m512 __A, __m512 __B)
10904 {
10905 return (__m512) ((__v16sf)__A - (__v16sf)__B);
10906 }
10907
10908 extern __inline __m512
10909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10910 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10911 {
10912 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10913 (__v16sf) __B,
10914 (__v16sf) __W,
10915 (__mmask16) __U,
10916 _MM_FROUND_CUR_DIRECTION);
10917 }
10918
10919 extern __inline __m512
10920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10921 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10922 {
10923 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10924 (__v16sf) __B,
10925 (__v16sf)
10926 _mm512_setzero_ps (),
10927 (__mmask16) __U,
10928 _MM_FROUND_CUR_DIRECTION);
10929 }
10930
10931 extern __inline __m512d
10932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10933 _mm512_mul_pd (__m512d __A, __m512d __B)
10934 {
10935 return (__m512d) ((__v8df)__A * (__v8df)__B);
10936 }
10937
10938 extern __inline __m512d
10939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10940 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10941 {
10942 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10943 (__v8df) __B,
10944 (__v8df) __W,
10945 (__mmask8) __U,
10946 _MM_FROUND_CUR_DIRECTION);
10947 }
10948
10949 extern __inline __m512d
10950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10951 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10952 {
10953 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10954 (__v8df) __B,
10955 (__v8df)
10956 _mm512_setzero_pd (),
10957 (__mmask8) __U,
10958 _MM_FROUND_CUR_DIRECTION);
10959 }
10960
10961 extern __inline __m512
10962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10963 _mm512_mul_ps (__m512 __A, __m512 __B)
10964 {
10965 return (__m512) ((__v16sf)__A * (__v16sf)__B);
10966 }
10967
10968 extern __inline __m512
10969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10970 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10971 {
10972 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10973 (__v16sf) __B,
10974 (__v16sf) __W,
10975 (__mmask16) __U,
10976 _MM_FROUND_CUR_DIRECTION);
10977 }
10978
10979 extern __inline __m512
10980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10981 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10982 {
10983 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10984 (__v16sf) __B,
10985 (__v16sf)
10986 _mm512_setzero_ps (),
10987 (__mmask16) __U,
10988 _MM_FROUND_CUR_DIRECTION);
10989 }
10990
10991 extern __inline __m512d
10992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10993 _mm512_div_pd (__m512d __M, __m512d __V)
10994 {
10995 return (__m512d) ((__v8df)__M / (__v8df)__V);
10996 }
10997
10998 extern __inline __m512d
10999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11000 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
11001 {
11002 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11003 (__v8df) __V,
11004 (__v8df) __W,
11005 (__mmask8) __U,
11006 _MM_FROUND_CUR_DIRECTION);
11007 }
11008
11009 extern __inline __m512d
11010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11011 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
11012 {
11013 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11014 (__v8df) __V,
11015 (__v8df)
11016 _mm512_setzero_pd (),
11017 (__mmask8) __U,
11018 _MM_FROUND_CUR_DIRECTION);
11019 }
11020
11021 extern __inline __m512
11022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023 _mm512_div_ps (__m512 __A, __m512 __B)
11024 {
11025 return (__m512) ((__v16sf)__A / (__v16sf)__B);
11026 }
11027
11028 extern __inline __m512
11029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11030 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11031 {
11032 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11033 (__v16sf) __B,
11034 (__v16sf) __W,
11035 (__mmask16) __U,
11036 _MM_FROUND_CUR_DIRECTION);
11037 }
11038
11039 extern __inline __m512
11040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11041 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11042 {
11043 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11044 (__v16sf) __B,
11045 (__v16sf)
11046 _mm512_setzero_ps (),
11047 (__mmask16) __U,
11048 _MM_FROUND_CUR_DIRECTION);
11049 }
11050
11051 extern __inline __m512d
11052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11053 _mm512_max_pd (__m512d __A, __m512d __B)
11054 {
11055 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11056 (__v8df) __B,
11057 (__v8df)
11058 _mm512_undefined_pd (),
11059 (__mmask8) -1,
11060 _MM_FROUND_CUR_DIRECTION);
11061 }
11062
11063 extern __inline __m512d
11064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11065 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11066 {
11067 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11068 (__v8df) __B,
11069 (__v8df) __W,
11070 (__mmask8) __U,
11071 _MM_FROUND_CUR_DIRECTION);
11072 }
11073
11074 extern __inline __m512d
11075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11076 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
11077 {
11078 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11079 (__v8df) __B,
11080 (__v8df)
11081 _mm512_setzero_pd (),
11082 (__mmask8) __U,
11083 _MM_FROUND_CUR_DIRECTION);
11084 }
11085
11086 extern __inline __m512
11087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11088 _mm512_max_ps (__m512 __A, __m512 __B)
11089 {
11090 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11091 (__v16sf) __B,
11092 (__v16sf)
11093 _mm512_undefined_ps (),
11094 (__mmask16) -1,
11095 _MM_FROUND_CUR_DIRECTION);
11096 }
11097
11098 extern __inline __m512
11099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11100 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11101 {
11102 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11103 (__v16sf) __B,
11104 (__v16sf) __W,
11105 (__mmask16) __U,
11106 _MM_FROUND_CUR_DIRECTION);
11107 }
11108
11109 extern __inline __m512
11110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11111 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11112 {
11113 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11114 (__v16sf) __B,
11115 (__v16sf)
11116 _mm512_setzero_ps (),
11117 (__mmask16) __U,
11118 _MM_FROUND_CUR_DIRECTION);
11119 }
11120
11121 extern __inline __m512d
11122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11123 _mm512_min_pd (__m512d __A, __m512d __B)
11124 {
11125 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11126 (__v8df) __B,
11127 (__v8df)
11128 _mm512_undefined_pd (),
11129 (__mmask8) -1,
11130 _MM_FROUND_CUR_DIRECTION);
11131 }
11132
11133 extern __inline __m512d
11134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11135 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11136 {
11137 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11138 (__v8df) __B,
11139 (__v8df) __W,
11140 (__mmask8) __U,
11141 _MM_FROUND_CUR_DIRECTION);
11142 }
11143
11144 extern __inline __m512d
11145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11146 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11147 {
11148 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11149 (__v8df) __B,
11150 (__v8df)
11151 _mm512_setzero_pd (),
11152 (__mmask8) __U,
11153 _MM_FROUND_CUR_DIRECTION);
11154 }
11155
11156 extern __inline __m512
11157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158 _mm512_min_ps (__m512 __A, __m512 __B)
11159 {
11160 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11161 (__v16sf) __B,
11162 (__v16sf)
11163 _mm512_undefined_ps (),
11164 (__mmask16) -1,
11165 _MM_FROUND_CUR_DIRECTION);
11166 }
11167
11168 extern __inline __m512
11169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11170 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11171 {
11172 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11173 (__v16sf) __B,
11174 (__v16sf) __W,
11175 (__mmask16) __U,
11176 _MM_FROUND_CUR_DIRECTION);
11177 }
11178
11179 extern __inline __m512
11180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11181 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11182 {
11183 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11184 (__v16sf) __B,
11185 (__v16sf)
11186 _mm512_setzero_ps (),
11187 (__mmask16) __U,
11188 _MM_FROUND_CUR_DIRECTION);
11189 }
11190
11191 extern __inline __m512d
11192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11193 _mm512_scalef_pd (__m512d __A, __m512d __B)
11194 {
11195 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11196 (__v8df) __B,
11197 (__v8df)
11198 _mm512_undefined_pd (),
11199 (__mmask8) -1,
11200 _MM_FROUND_CUR_DIRECTION);
11201 }
11202
11203 extern __inline __m512d
11204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11205 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11206 {
11207 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11208 (__v8df) __B,
11209 (__v8df) __W,
11210 (__mmask8) __U,
11211 _MM_FROUND_CUR_DIRECTION);
11212 }
11213
11214 extern __inline __m512d
11215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11216 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11217 {
11218 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11219 (__v8df) __B,
11220 (__v8df)
11221 _mm512_setzero_pd (),
11222 (__mmask8) __U,
11223 _MM_FROUND_CUR_DIRECTION);
11224 }
11225
11226 extern __inline __m512
11227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11228 _mm512_scalef_ps (__m512 __A, __m512 __B)
11229 {
11230 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11231 (__v16sf) __B,
11232 (__v16sf)
11233 _mm512_undefined_ps (),
11234 (__mmask16) -1,
11235 _MM_FROUND_CUR_DIRECTION);
11236 }
11237
11238 extern __inline __m512
11239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11240 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11241 {
11242 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11243 (__v16sf) __B,
11244 (__v16sf) __W,
11245 (__mmask16) __U,
11246 _MM_FROUND_CUR_DIRECTION);
11247 }
11248
11249 extern __inline __m512
11250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11251 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11252 {
11253 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11254 (__v16sf) __B,
11255 (__v16sf)
11256 _mm512_setzero_ps (),
11257 (__mmask16) __U,
11258 _MM_FROUND_CUR_DIRECTION);
11259 }
11260
11261 extern __inline __m128d
11262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11263 _mm_scalef_sd (__m128d __A, __m128d __B)
11264 {
11265 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11266 (__v2df) __B,
11267 _MM_FROUND_CUR_DIRECTION);
11268 }
11269
11270 extern __inline __m128
11271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11272 _mm_scalef_ss (__m128 __A, __m128 __B)
11273 {
11274 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11275 (__v4sf) __B,
11276 _MM_FROUND_CUR_DIRECTION);
11277 }
11278
11279 extern __inline __m512d
11280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11281 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11282 {
11283 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11284 (__v8df) __B,
11285 (__v8df) __C,
11286 (__mmask8) -1,
11287 _MM_FROUND_CUR_DIRECTION);
11288 }
11289
11290 extern __inline __m512d
11291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11292 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11293 {
11294 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11295 (__v8df) __B,
11296 (__v8df) __C,
11297 (__mmask8) __U,
11298 _MM_FROUND_CUR_DIRECTION);
11299 }
11300
11301 extern __inline __m512d
11302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11303 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11304 {
11305 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11306 (__v8df) __B,
11307 (__v8df) __C,
11308 (__mmask8) __U,
11309 _MM_FROUND_CUR_DIRECTION);
11310 }
11311
11312 extern __inline __m512d
11313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11314 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11315 {
11316 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11317 (__v8df) __B,
11318 (__v8df) __C,
11319 (__mmask8) __U,
11320 _MM_FROUND_CUR_DIRECTION);
11321 }
11322
11323 extern __inline __m512
11324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11325 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11326 {
11327 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11328 (__v16sf) __B,
11329 (__v16sf) __C,
11330 (__mmask16) -1,
11331 _MM_FROUND_CUR_DIRECTION);
11332 }
11333
11334 extern __inline __m512
11335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11336 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11337 {
11338 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11339 (__v16sf) __B,
11340 (__v16sf) __C,
11341 (__mmask16) __U,
11342 _MM_FROUND_CUR_DIRECTION);
11343 }
11344
11345 extern __inline __m512
11346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11347 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11348 {
11349 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11350 (__v16sf) __B,
11351 (__v16sf) __C,
11352 (__mmask16) __U,
11353 _MM_FROUND_CUR_DIRECTION);
11354 }
11355
11356 extern __inline __m512
11357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11358 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11359 {
11360 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11361 (__v16sf) __B,
11362 (__v16sf) __C,
11363 (__mmask16) __U,
11364 _MM_FROUND_CUR_DIRECTION);
11365 }
11366
11367 extern __inline __m512d
11368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11369 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11370 {
11371 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11372 (__v8df) __B,
11373 -(__v8df) __C,
11374 (__mmask8) -1,
11375 _MM_FROUND_CUR_DIRECTION);
11376 }
11377
11378 extern __inline __m512d
11379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11380 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11381 {
11382 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11383 (__v8df) __B,
11384 -(__v8df) __C,
11385 (__mmask8) __U,
11386 _MM_FROUND_CUR_DIRECTION);
11387 }
11388
11389 extern __inline __m512d
11390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11391 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11392 {
11393 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11394 (__v8df) __B,
11395 (__v8df) __C,
11396 (__mmask8) __U,
11397 _MM_FROUND_CUR_DIRECTION);
11398 }
11399
11400 extern __inline __m512d
11401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11402 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11403 {
11404 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11405 (__v8df) __B,
11406 -(__v8df) __C,
11407 (__mmask8) __U,
11408 _MM_FROUND_CUR_DIRECTION);
11409 }
11410
11411 extern __inline __m512
11412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11413 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11414 {
11415 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11416 (__v16sf) __B,
11417 -(__v16sf) __C,
11418 (__mmask16) -1,
11419 _MM_FROUND_CUR_DIRECTION);
11420 }
11421
11422 extern __inline __m512
11423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11424 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11425 {
11426 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11427 (__v16sf) __B,
11428 -(__v16sf) __C,
11429 (__mmask16) __U,
11430 _MM_FROUND_CUR_DIRECTION);
11431 }
11432
11433 extern __inline __m512
11434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11435 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11436 {
11437 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11438 (__v16sf) __B,
11439 (__v16sf) __C,
11440 (__mmask16) __U,
11441 _MM_FROUND_CUR_DIRECTION);
11442 }
11443
11444 extern __inline __m512
11445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11446 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11447 {
11448 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11449 (__v16sf) __B,
11450 -(__v16sf) __C,
11451 (__mmask16) __U,
11452 _MM_FROUND_CUR_DIRECTION);
11453 }
11454
11455 extern __inline __m512d
11456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11457 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11458 {
11459 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11460 (__v8df) __B,
11461 (__v8df) __C,
11462 (__mmask8) -1,
11463 _MM_FROUND_CUR_DIRECTION);
11464 }
11465
11466 extern __inline __m512d
11467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11468 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11469 {
11470 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11471 (__v8df) __B,
11472 (__v8df) __C,
11473 (__mmask8) __U,
11474 _MM_FROUND_CUR_DIRECTION);
11475 }
11476
11477 extern __inline __m512d
11478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11479 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11480 {
11481 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11482 (__v8df) __B,
11483 (__v8df) __C,
11484 (__mmask8) __U,
11485 _MM_FROUND_CUR_DIRECTION);
11486 }
11487
11488 extern __inline __m512d
11489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11490 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11491 {
11492 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11493 (__v8df) __B,
11494 (__v8df) __C,
11495 (__mmask8) __U,
11496 _MM_FROUND_CUR_DIRECTION);
11497 }
11498
11499 extern __inline __m512
11500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11501 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11502 {
11503 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11504 (__v16sf) __B,
11505 (__v16sf) __C,
11506 (__mmask16) -1,
11507 _MM_FROUND_CUR_DIRECTION);
11508 }
11509
11510 extern __inline __m512
11511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11512 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11513 {
11514 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11515 (__v16sf) __B,
11516 (__v16sf) __C,
11517 (__mmask16) __U,
11518 _MM_FROUND_CUR_DIRECTION);
11519 }
11520
11521 extern __inline __m512
11522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11523 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11524 {
11525 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11526 (__v16sf) __B,
11527 (__v16sf) __C,
11528 (__mmask16) __U,
11529 _MM_FROUND_CUR_DIRECTION);
11530 }
11531
11532 extern __inline __m512
11533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11534 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11535 {
11536 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11537 (__v16sf) __B,
11538 (__v16sf) __C,
11539 (__mmask16) __U,
11540 _MM_FROUND_CUR_DIRECTION);
11541 }
11542
11543 extern __inline __m512d
11544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11545 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11546 {
11547 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11548 (__v8df) __B,
11549 -(__v8df) __C,
11550 (__mmask8) -1,
11551 _MM_FROUND_CUR_DIRECTION);
11552 }
11553
11554 extern __inline __m512d
11555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11556 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11557 {
11558 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11559 (__v8df) __B,
11560 -(__v8df) __C,
11561 (__mmask8) __U,
11562 _MM_FROUND_CUR_DIRECTION);
11563 }
11564
11565 extern __inline __m512d
11566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11567 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11568 {
11569 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11570 (__v8df) __B,
11571 (__v8df) __C,
11572 (__mmask8) __U,
11573 _MM_FROUND_CUR_DIRECTION);
11574 }
11575
11576 extern __inline __m512d
11577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11578 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11579 {
11580 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11581 (__v8df) __B,
11582 -(__v8df) __C,
11583 (__mmask8) __U,
11584 _MM_FROUND_CUR_DIRECTION);
11585 }
11586
11587 extern __inline __m512
11588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11589 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11590 {
11591 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11592 (__v16sf) __B,
11593 -(__v16sf) __C,
11594 (__mmask16) -1,
11595 _MM_FROUND_CUR_DIRECTION);
11596 }
11597
11598 extern __inline __m512
11599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11600 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11601 {
11602 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11603 (__v16sf) __B,
11604 -(__v16sf) __C,
11605 (__mmask16) __U,
11606 _MM_FROUND_CUR_DIRECTION);
11607 }
11608
11609 extern __inline __m512
11610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11611 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11612 {
11613 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11614 (__v16sf) __B,
11615 (__v16sf) __C,
11616 (__mmask16) __U,
11617 _MM_FROUND_CUR_DIRECTION);
11618 }
11619
11620 extern __inline __m512
11621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11622 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11623 {
11624 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11625 (__v16sf) __B,
11626 -(__v16sf) __C,
11627 (__mmask16) __U,
11628 _MM_FROUND_CUR_DIRECTION);
11629 }
11630
11631 extern __inline __m512d
11632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11634 {
11635 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11636 (__v8df) __B,
11637 (__v8df) __C,
11638 (__mmask8) -1,
11639 _MM_FROUND_CUR_DIRECTION);
11640 }
11641
11642 extern __inline __m512d
11643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11644 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11645 {
11646 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11647 (__v8df) __B,
11648 (__v8df) __C,
11649 (__mmask8) __U,
11650 _MM_FROUND_CUR_DIRECTION);
11651 }
11652
11653 extern __inline __m512d
11654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11655 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11656 {
11657 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11658 (__v8df) __B,
11659 (__v8df) __C,
11660 (__mmask8) __U,
11661 _MM_FROUND_CUR_DIRECTION);
11662 }
11663
11664 extern __inline __m512d
11665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11666 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11667 {
11668 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11669 (__v8df) __B,
11670 (__v8df) __C,
11671 (__mmask8) __U,
11672 _MM_FROUND_CUR_DIRECTION);
11673 }
11674
11675 extern __inline __m512
11676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11677 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11678 {
11679 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11680 (__v16sf) __B,
11681 (__v16sf) __C,
11682 (__mmask16) -1,
11683 _MM_FROUND_CUR_DIRECTION);
11684 }
11685
11686 extern __inline __m512
11687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11688 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11689 {
11690 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11691 (__v16sf) __B,
11692 (__v16sf) __C,
11693 (__mmask16) __U,
11694 _MM_FROUND_CUR_DIRECTION);
11695 }
11696
11697 extern __inline __m512
11698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11700 {
11701 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11702 (__v16sf) __B,
11703 (__v16sf) __C,
11704 (__mmask16) __U,
11705 _MM_FROUND_CUR_DIRECTION);
11706 }
11707
11708 extern __inline __m512
11709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11710 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11711 {
11712 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11713 (__v16sf) __B,
11714 (__v16sf) __C,
11715 (__mmask16) __U,
11716 _MM_FROUND_CUR_DIRECTION);
11717 }
11718
11719 extern __inline __m512d
11720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11721 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11722 {
11723 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11724 (__v8df) __B,
11725 -(__v8df) __C,
11726 (__mmask8) -1,
11727 _MM_FROUND_CUR_DIRECTION);
11728 }
11729
11730 extern __inline __m512d
11731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11732 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11733 {
11734 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11735 (__v8df) __B,
11736 (__v8df) __C,
11737 (__mmask8) __U,
11738 _MM_FROUND_CUR_DIRECTION);
11739 }
11740
11741 extern __inline __m512d
11742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11743 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11744 {
11745 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11746 (__v8df) __B,
11747 (__v8df) __C,
11748 (__mmask8) __U,
11749 _MM_FROUND_CUR_DIRECTION);
11750 }
11751
11752 extern __inline __m512d
11753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11754 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11755 {
11756 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11757 (__v8df) __B,
11758 -(__v8df) __C,
11759 (__mmask8) __U,
11760 _MM_FROUND_CUR_DIRECTION);
11761 }
11762
11763 extern __inline __m512
11764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11765 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11766 {
11767 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11768 (__v16sf) __B,
11769 -(__v16sf) __C,
11770 (__mmask16) -1,
11771 _MM_FROUND_CUR_DIRECTION);
11772 }
11773
11774 extern __inline __m512
11775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11776 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11777 {
11778 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11779 (__v16sf) __B,
11780 (__v16sf) __C,
11781 (__mmask16) __U,
11782 _MM_FROUND_CUR_DIRECTION);
11783 }
11784
11785 extern __inline __m512
11786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11787 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11788 {
11789 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11790 (__v16sf) __B,
11791 (__v16sf) __C,
11792 (__mmask16) __U,
11793 _MM_FROUND_CUR_DIRECTION);
11794 }
11795
11796 extern __inline __m512
11797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11798 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11799 {
11800 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11801 (__v16sf) __B,
11802 -(__v16sf) __C,
11803 (__mmask16) __U,
11804 _MM_FROUND_CUR_DIRECTION);
11805 }
11806
11807 extern __inline __m256i
11808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11809 _mm512_cvttpd_epi32 (__m512d __A)
11810 {
11811 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11812 (__v8si)
11813 _mm256_undefined_si256 (),
11814 (__mmask8) -1,
11815 _MM_FROUND_CUR_DIRECTION);
11816 }
11817
11818 extern __inline __m256i
11819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11820 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11821 {
11822 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11823 (__v8si) __W,
11824 (__mmask8) __U,
11825 _MM_FROUND_CUR_DIRECTION);
11826 }
11827
11828 extern __inline __m256i
11829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11830 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11831 {
11832 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11833 (__v8si)
11834 _mm256_setzero_si256 (),
11835 (__mmask8) __U,
11836 _MM_FROUND_CUR_DIRECTION);
11837 }
11838
11839 extern __inline __m256i
11840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11841 _mm512_cvttpd_epu32 (__m512d __A)
11842 {
11843 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11844 (__v8si)
11845 _mm256_undefined_si256 (),
11846 (__mmask8) -1,
11847 _MM_FROUND_CUR_DIRECTION);
11848 }
11849
11850 extern __inline __m256i
11851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11852 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11853 {
11854 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11855 (__v8si) __W,
11856 (__mmask8) __U,
11857 _MM_FROUND_CUR_DIRECTION);
11858 }
11859
11860 extern __inline __m256i
11861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11862 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11863 {
11864 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11865 (__v8si)
11866 _mm256_setzero_si256 (),
11867 (__mmask8) __U,
11868 _MM_FROUND_CUR_DIRECTION);
11869 }
11870
11871 extern __inline __m256i
11872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11873 _mm512_cvtpd_epi32 (__m512d __A)
11874 {
11875 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11876 (__v8si)
11877 _mm256_undefined_si256 (),
11878 (__mmask8) -1,
11879 _MM_FROUND_CUR_DIRECTION);
11880 }
11881
11882 extern __inline __m256i
11883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11884 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11885 {
11886 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11887 (__v8si) __W,
11888 (__mmask8) __U,
11889 _MM_FROUND_CUR_DIRECTION);
11890 }
11891
11892 extern __inline __m256i
11893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11894 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11895 {
11896 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11897 (__v8si)
11898 _mm256_setzero_si256 (),
11899 (__mmask8) __U,
11900 _MM_FROUND_CUR_DIRECTION);
11901 }
11902
11903 extern __inline __m256i
11904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11905 _mm512_cvtpd_epu32 (__m512d __A)
11906 {
11907 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11908 (__v8si)
11909 _mm256_undefined_si256 (),
11910 (__mmask8) -1,
11911 _MM_FROUND_CUR_DIRECTION);
11912 }
11913
11914 extern __inline __m256i
11915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11916 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11917 {
11918 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11919 (__v8si) __W,
11920 (__mmask8) __U,
11921 _MM_FROUND_CUR_DIRECTION);
11922 }
11923
11924 extern __inline __m256i
11925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11926 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11927 {
11928 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11929 (__v8si)
11930 _mm256_setzero_si256 (),
11931 (__mmask8) __U,
11932 _MM_FROUND_CUR_DIRECTION);
11933 }
11934
11935 extern __inline __m512i
11936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11937 _mm512_cvttps_epi32 (__m512 __A)
11938 {
11939 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11940 (__v16si)
11941 _mm512_undefined_epi32 (),
11942 (__mmask16) -1,
11943 _MM_FROUND_CUR_DIRECTION);
11944 }
11945
11946 extern __inline __m512i
11947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11948 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11949 {
11950 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11951 (__v16si) __W,
11952 (__mmask16) __U,
11953 _MM_FROUND_CUR_DIRECTION);
11954 }
11955
11956 extern __inline __m512i
11957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11959 {
11960 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11961 (__v16si)
11962 _mm512_setzero_si512 (),
11963 (__mmask16) __U,
11964 _MM_FROUND_CUR_DIRECTION);
11965 }
11966
11967 extern __inline __m512i
11968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11969 _mm512_cvttps_epu32 (__m512 __A)
11970 {
11971 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11972 (__v16si)
11973 _mm512_undefined_epi32 (),
11974 (__mmask16) -1,
11975 _MM_FROUND_CUR_DIRECTION);
11976 }
11977
11978 extern __inline __m512i
11979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11980 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11981 {
11982 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11983 (__v16si) __W,
11984 (__mmask16) __U,
11985 _MM_FROUND_CUR_DIRECTION);
11986 }
11987
11988 extern __inline __m512i
11989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11990 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11991 {
11992 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11993 (__v16si)
11994 _mm512_setzero_si512 (),
11995 (__mmask16) __U,
11996 _MM_FROUND_CUR_DIRECTION);
11997 }
11998
11999 extern __inline __m512i
12000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12001 _mm512_cvtps_epi32 (__m512 __A)
12002 {
12003 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12004 (__v16si)
12005 _mm512_undefined_epi32 (),
12006 (__mmask16) -1,
12007 _MM_FROUND_CUR_DIRECTION);
12008 }
12009
12010 extern __inline __m512i
12011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12012 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12013 {
12014 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12015 (__v16si) __W,
12016 (__mmask16) __U,
12017 _MM_FROUND_CUR_DIRECTION);
12018 }
12019
12020 extern __inline __m512i
12021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12022 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
12023 {
12024 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12025 (__v16si)
12026 _mm512_setzero_si512 (),
12027 (__mmask16) __U,
12028 _MM_FROUND_CUR_DIRECTION);
12029 }
12030
12031 extern __inline __m512i
12032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12033 _mm512_cvtps_epu32 (__m512 __A)
12034 {
12035 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12036 (__v16si)
12037 _mm512_undefined_epi32 (),
12038 (__mmask16) -1,
12039 _MM_FROUND_CUR_DIRECTION);
12040 }
12041
12042 extern __inline __m512i
12043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12044 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12045 {
12046 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12047 (__v16si) __W,
12048 (__mmask16) __U,
12049 _MM_FROUND_CUR_DIRECTION);
12050 }
12051
12052 extern __inline __m512i
12053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12054 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
12055 {
12056 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12057 (__v16si)
12058 _mm512_setzero_si512 (),
12059 (__mmask16) __U,
12060 _MM_FROUND_CUR_DIRECTION);
12061 }
12062
12063 extern __inline double
12064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12065 _mm512_cvtsd_f64 (__m512d __A)
12066 {
12067 return __A[0];
12068 }
12069
12070 extern __inline float
12071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12072 _mm512_cvtss_f32 (__m512 __A)
12073 {
12074 return __A[0];
12075 }
12076
12077 #ifdef __x86_64__
12078 extern __inline __m128
12079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12080 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
12081 {
12082 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
12083 _MM_FROUND_CUR_DIRECTION);
12084 }
12085
12086 extern __inline __m128d
12087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12088 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
12089 {
12090 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
12091 _MM_FROUND_CUR_DIRECTION);
12092 }
12093 #endif
12094
12095 extern __inline __m128
12096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12097 _mm_cvtu32_ss (__m128 __A, unsigned __B)
12098 {
12099 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
12100 _MM_FROUND_CUR_DIRECTION);
12101 }
12102
12103 extern __inline __m512
12104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12105 _mm512_cvtepi32_ps (__m512i __A)
12106 {
12107 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12108 (__v16sf)
12109 _mm512_undefined_ps (),
12110 (__mmask16) -1,
12111 _MM_FROUND_CUR_DIRECTION);
12112 }
12113
12114 extern __inline __m512
12115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12116 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12117 {
12118 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12119 (__v16sf) __W,
12120 (__mmask16) __U,
12121 _MM_FROUND_CUR_DIRECTION);
12122 }
12123
12124 extern __inline __m512
12125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12126 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
12127 {
12128 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12129 (__v16sf)
12130 _mm512_setzero_ps (),
12131 (__mmask16) __U,
12132 _MM_FROUND_CUR_DIRECTION);
12133 }
12134
12135 extern __inline __m512
12136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12137 _mm512_cvtepu32_ps (__m512i __A)
12138 {
12139 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12140 (__v16sf)
12141 _mm512_undefined_ps (),
12142 (__mmask16) -1,
12143 _MM_FROUND_CUR_DIRECTION);
12144 }
12145
12146 extern __inline __m512
12147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12148 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12149 {
12150 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12151 (__v16sf) __W,
12152 (__mmask16) __U,
12153 _MM_FROUND_CUR_DIRECTION);
12154 }
12155
12156 extern __inline __m512
12157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12158 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12159 {
12160 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12161 (__v16sf)
12162 _mm512_setzero_ps (),
12163 (__mmask16) __U,
12164 _MM_FROUND_CUR_DIRECTION);
12165 }
12166
12167 #ifdef __OPTIMIZE__
12168 extern __inline __m512d
12169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12170 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12171 {
12172 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12173 (__v8df) __B,
12174 (__v8di) __C,
12175 __imm,
12176 (__mmask8) -1,
12177 _MM_FROUND_CUR_DIRECTION);
12178 }
12179
12180 extern __inline __m512d
12181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12182 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12183 __m512i __C, const int __imm)
12184 {
12185 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12186 (__v8df) __B,
12187 (__v8di) __C,
12188 __imm,
12189 (__mmask8) __U,
12190 _MM_FROUND_CUR_DIRECTION);
12191 }
12192
12193 extern __inline __m512d
12194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12195 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12196 __m512i __C, const int __imm)
12197 {
12198 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12199 (__v8df) __B,
12200 (__v8di) __C,
12201 __imm,
12202 (__mmask8) __U,
12203 _MM_FROUND_CUR_DIRECTION);
12204 }
12205
12206 extern __inline __m512
12207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12208 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12209 {
12210 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12211 (__v16sf) __B,
12212 (__v16si) __C,
12213 __imm,
12214 (__mmask16) -1,
12215 _MM_FROUND_CUR_DIRECTION);
12216 }
12217
12218 extern __inline __m512
12219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12220 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12221 __m512i __C, const int __imm)
12222 {
12223 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12224 (__v16sf) __B,
12225 (__v16si) __C,
12226 __imm,
12227 (__mmask16) __U,
12228 _MM_FROUND_CUR_DIRECTION);
12229 }
12230
12231 extern __inline __m512
12232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12233 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12234 __m512i __C, const int __imm)
12235 {
12236 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12237 (__v16sf) __B,
12238 (__v16si) __C,
12239 __imm,
12240 (__mmask16) __U,
12241 _MM_FROUND_CUR_DIRECTION);
12242 }
12243
12244 extern __inline __m128d
12245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12246 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12247 {
12248 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12249 (__v2df) __B,
12250 (__v2di) __C, __imm,
12251 (__mmask8) -1,
12252 _MM_FROUND_CUR_DIRECTION);
12253 }
12254
12255 extern __inline __m128d
12256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12257 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12258 __m128i __C, const int __imm)
12259 {
12260 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12261 (__v2df) __B,
12262 (__v2di) __C, __imm,
12263 (__mmask8) __U,
12264 _MM_FROUND_CUR_DIRECTION);
12265 }
12266
12267 extern __inline __m128d
12268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12269 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12270 __m128i __C, const int __imm)
12271 {
12272 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12273 (__v2df) __B,
12274 (__v2di) __C,
12275 __imm,
12276 (__mmask8) __U,
12277 _MM_FROUND_CUR_DIRECTION);
12278 }
12279
12280 extern __inline __m128
12281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12283 {
12284 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12285 (__v4sf) __B,
12286 (__v4si) __C, __imm,
12287 (__mmask8) -1,
12288 _MM_FROUND_CUR_DIRECTION);
12289 }
12290
12291 extern __inline __m128
12292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12293 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12294 __m128i __C, const int __imm)
12295 {
12296 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12297 (__v4sf) __B,
12298 (__v4si) __C, __imm,
12299 (__mmask8) __U,
12300 _MM_FROUND_CUR_DIRECTION);
12301 }
12302
12303 extern __inline __m128
12304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12305 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12306 __m128i __C, const int __imm)
12307 {
12308 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12309 (__v4sf) __B,
12310 (__v4si) __C, __imm,
12311 (__mmask8) __U,
12312 _MM_FROUND_CUR_DIRECTION);
12313 }
12314 #else
12315 #define _mm512_fixupimm_pd(X, Y, Z, C) \
12316 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12317 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12318 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12319
12320 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12321 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12322 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12323 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12324
12325 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12326 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12327 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12328 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12329
12330 #define _mm512_fixupimm_ps(X, Y, Z, C) \
12331 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12332 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12333 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12334
12335 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12336 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12337 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12338 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12339
12340 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12341 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12342 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12343 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12344
12345 #define _mm_fixupimm_sd(X, Y, Z, C) \
12346 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12347 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12348 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12349
12350 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12351 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12352 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12353 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12354
12355 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12356 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12357 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12358 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12359
12360 #define _mm_fixupimm_ss(X, Y, Z, C) \
12361 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12362 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12363 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12364
12365 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12366 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12367 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12368 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12369
12370 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12371 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12372 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12373 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12374 #endif
12375
12376 #ifdef __x86_64__
12377 extern __inline unsigned long long
12378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12379 _mm_cvtss_u64 (__m128 __A)
12380 {
12381 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12382 __A,
12383 _MM_FROUND_CUR_DIRECTION);
12384 }
12385
12386 extern __inline unsigned long long
12387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12388 _mm_cvttss_u64 (__m128 __A)
12389 {
12390 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12391 __A,
12392 _MM_FROUND_CUR_DIRECTION);
12393 }
12394
12395 extern __inline long long
12396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12397 _mm_cvttss_i64 (__m128 __A)
12398 {
12399 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12400 _MM_FROUND_CUR_DIRECTION);
12401 }
12402 #endif /* __x86_64__ */
12403
12404 extern __inline unsigned
12405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12406 _mm_cvtss_u32 (__m128 __A)
12407 {
12408 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12409 _MM_FROUND_CUR_DIRECTION);
12410 }
12411
12412 extern __inline unsigned
12413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12414 _mm_cvttss_u32 (__m128 __A)
12415 {
12416 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12417 _MM_FROUND_CUR_DIRECTION);
12418 }
12419
12420 extern __inline int
12421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12422 _mm_cvttss_i32 (__m128 __A)
12423 {
12424 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12425 _MM_FROUND_CUR_DIRECTION);
12426 }
12427
12428 #ifdef __x86_64__
12429 extern __inline unsigned long long
12430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12431 _mm_cvtsd_u64 (__m128d __A)
12432 {
12433 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12434 __A,
12435 _MM_FROUND_CUR_DIRECTION);
12436 }
12437
12438 extern __inline unsigned long long
12439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12440 _mm_cvttsd_u64 (__m128d __A)
12441 {
12442 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12443 __A,
12444 _MM_FROUND_CUR_DIRECTION);
12445 }
12446
12447 extern __inline long long
12448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12449 _mm_cvttsd_i64 (__m128d __A)
12450 {
12451 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12452 _MM_FROUND_CUR_DIRECTION);
12453 }
12454 #endif /* __x86_64__ */
12455
12456 extern __inline unsigned
12457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12458 _mm_cvtsd_u32 (__m128d __A)
12459 {
12460 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12461 _MM_FROUND_CUR_DIRECTION);
12462 }
12463
12464 extern __inline unsigned
12465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12466 _mm_cvttsd_u32 (__m128d __A)
12467 {
12468 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12469 _MM_FROUND_CUR_DIRECTION);
12470 }
12471
12472 extern __inline int
12473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12474 _mm_cvttsd_i32 (__m128d __A)
12475 {
12476 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12477 _MM_FROUND_CUR_DIRECTION);
12478 }
12479
12480 extern __inline __m512d
12481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12482 _mm512_cvtps_pd (__m256 __A)
12483 {
12484 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12485 (__v8df)
12486 _mm512_undefined_pd (),
12487 (__mmask8) -1,
12488 _MM_FROUND_CUR_DIRECTION);
12489 }
12490
12491 extern __inline __m512d
12492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12493 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12494 {
12495 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12496 (__v8df) __W,
12497 (__mmask8) __U,
12498 _MM_FROUND_CUR_DIRECTION);
12499 }
12500
12501 extern __inline __m512d
12502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12503 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12504 {
12505 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12506 (__v8df)
12507 _mm512_setzero_pd (),
12508 (__mmask8) __U,
12509 _MM_FROUND_CUR_DIRECTION);
12510 }
12511
12512 extern __inline __m512
12513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12514 _mm512_cvtph_ps (__m256i __A)
12515 {
12516 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12517 (__v16sf)
12518 _mm512_undefined_ps (),
12519 (__mmask16) -1,
12520 _MM_FROUND_CUR_DIRECTION);
12521 }
12522
12523 extern __inline __m512
12524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12525 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12526 {
12527 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12528 (__v16sf) __W,
12529 (__mmask16) __U,
12530 _MM_FROUND_CUR_DIRECTION);
12531 }
12532
12533 extern __inline __m512
12534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12535 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12536 {
12537 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12538 (__v16sf)
12539 _mm512_setzero_ps (),
12540 (__mmask16) __U,
12541 _MM_FROUND_CUR_DIRECTION);
12542 }
12543
12544 extern __inline __m256
12545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12546 _mm512_cvtpd_ps (__m512d __A)
12547 {
12548 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12549 (__v8sf)
12550 _mm256_undefined_ps (),
12551 (__mmask8) -1,
12552 _MM_FROUND_CUR_DIRECTION);
12553 }
12554
12555 extern __inline __m256
12556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12557 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12558 {
12559 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12560 (__v8sf) __W,
12561 (__mmask8) __U,
12562 _MM_FROUND_CUR_DIRECTION);
12563 }
12564
12565 extern __inline __m256
12566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12567 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12568 {
12569 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12570 (__v8sf)
12571 _mm256_setzero_ps (),
12572 (__mmask8) __U,
12573 _MM_FROUND_CUR_DIRECTION);
12574 }
12575
12576 #ifdef __OPTIMIZE__
12577 extern __inline __m512
12578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12579 _mm512_getexp_ps (__m512 __A)
12580 {
12581 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12582 (__v16sf)
12583 _mm512_undefined_ps (),
12584 (__mmask16) -1,
12585 _MM_FROUND_CUR_DIRECTION);
12586 }
12587
12588 extern __inline __m512
12589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12590 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12591 {
12592 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12593 (__v16sf) __W,
12594 (__mmask16) __U,
12595 _MM_FROUND_CUR_DIRECTION);
12596 }
12597
12598 extern __inline __m512
12599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12600 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12601 {
12602 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12603 (__v16sf)
12604 _mm512_setzero_ps (),
12605 (__mmask16) __U,
12606 _MM_FROUND_CUR_DIRECTION);
12607 }
12608
12609 extern __inline __m512d
12610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12611 _mm512_getexp_pd (__m512d __A)
12612 {
12613 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12614 (__v8df)
12615 _mm512_undefined_pd (),
12616 (__mmask8) -1,
12617 _MM_FROUND_CUR_DIRECTION);
12618 }
12619
12620 extern __inline __m512d
12621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12622 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12623 {
12624 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12625 (__v8df) __W,
12626 (__mmask8) __U,
12627 _MM_FROUND_CUR_DIRECTION);
12628 }
12629
12630 extern __inline __m512d
12631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12632 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12633 {
12634 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12635 (__v8df)
12636 _mm512_setzero_pd (),
12637 (__mmask8) __U,
12638 _MM_FROUND_CUR_DIRECTION);
12639 }
12640
12641 extern __inline __m128
12642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12643 _mm_getexp_ss (__m128 __A, __m128 __B)
12644 {
12645 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12646 (__v4sf) __B,
12647 _MM_FROUND_CUR_DIRECTION);
12648 }
12649
12650 extern __inline __m128d
12651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12652 _mm_getexp_sd (__m128d __A, __m128d __B)
12653 {
12654 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12655 (__v2df) __B,
12656 _MM_FROUND_CUR_DIRECTION);
12657 }
12658
12659 extern __inline __m512d
12660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12661 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12662 _MM_MANTISSA_SIGN_ENUM __C)
12663 {
12664 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12665 (__C << 2) | __B,
12666 _mm512_undefined_pd (),
12667 (__mmask8) -1,
12668 _MM_FROUND_CUR_DIRECTION);
12669 }
12670
12671 extern __inline __m512d
12672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12673 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12674 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12675 {
12676 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12677 (__C << 2) | __B,
12678 (__v8df) __W, __U,
12679 _MM_FROUND_CUR_DIRECTION);
12680 }
12681
12682 extern __inline __m512d
12683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12684 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12685 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12686 {
12687 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12688 (__C << 2) | __B,
12689 (__v8df)
12690 _mm512_setzero_pd (),
12691 __U,
12692 _MM_FROUND_CUR_DIRECTION);
12693 }
12694
12695 extern __inline __m512
12696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12697 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12698 _MM_MANTISSA_SIGN_ENUM __C)
12699 {
12700 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12701 (__C << 2) | __B,
12702 _mm512_undefined_ps (),
12703 (__mmask16) -1,
12704 _MM_FROUND_CUR_DIRECTION);
12705 }
12706
12707 extern __inline __m512
12708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12709 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12710 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12711 {
12712 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12713 (__C << 2) | __B,
12714 (__v16sf) __W, __U,
12715 _MM_FROUND_CUR_DIRECTION);
12716 }
12717
12718 extern __inline __m512
12719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12720 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12721 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12722 {
12723 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12724 (__C << 2) | __B,
12725 (__v16sf)
12726 _mm512_setzero_ps (),
12727 __U,
12728 _MM_FROUND_CUR_DIRECTION);
12729 }
12730
12731 extern __inline __m128d
12732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12733 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12734 _MM_MANTISSA_SIGN_ENUM __D)
12735 {
12736 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12737 (__v2df) __B,
12738 (__D << 2) | __C,
12739 _MM_FROUND_CUR_DIRECTION);
12740 }
12741
12742 extern __inline __m128
12743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12744 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12745 _MM_MANTISSA_SIGN_ENUM __D)
12746 {
12747 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12748 (__v4sf) __B,
12749 (__D << 2) | __C,
12750 _MM_FROUND_CUR_DIRECTION);
12751 }
12752
12753 #else
12754 #define _mm512_getmant_pd(X, B, C) \
12755 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12756 (int)(((C)<<2) | (B)), \
12757 (__v8df)_mm512_undefined_pd(), \
12758 (__mmask8)-1,\
12759 _MM_FROUND_CUR_DIRECTION))
12760
12761 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12762 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12763 (int)(((C)<<2) | (B)), \
12764 (__v8df)(__m512d)(W), \
12765 (__mmask8)(U),\
12766 _MM_FROUND_CUR_DIRECTION))
12767
12768 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12769 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12770 (int)(((C)<<2) | (B)), \
12771 (__v8df)_mm512_setzero_pd(), \
12772 (__mmask8)(U),\
12773 _MM_FROUND_CUR_DIRECTION))
12774 #define _mm512_getmant_ps(X, B, C) \
12775 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12776 (int)(((C)<<2) | (B)), \
12777 (__v16sf)_mm512_undefined_ps(), \
12778 (__mmask16)-1,\
12779 _MM_FROUND_CUR_DIRECTION))
12780
12781 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12782 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12783 (int)(((C)<<2) | (B)), \
12784 (__v16sf)(__m512)(W), \
12785 (__mmask16)(U),\
12786 _MM_FROUND_CUR_DIRECTION))
12787
12788 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12789 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12790 (int)(((C)<<2) | (B)), \
12791 (__v16sf)_mm512_setzero_ps(), \
12792 (__mmask16)(U),\
12793 _MM_FROUND_CUR_DIRECTION))
12794 #define _mm_getmant_sd(X, Y, C, D) \
12795 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12796 (__v2df)(__m128d)(Y), \
12797 (int)(((D)<<2) | (C)), \
12798 _MM_FROUND_CUR_DIRECTION))
12799
12800 #define _mm_getmant_ss(X, Y, C, D) \
12801 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12802 (__v4sf)(__m128)(Y), \
12803 (int)(((D)<<2) | (C)), \
12804 _MM_FROUND_CUR_DIRECTION))
12805
12806 #define _mm_getexp_ss(A, B) \
12807 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12808 _MM_FROUND_CUR_DIRECTION))
12809
12810 #define _mm_getexp_sd(A, B) \
12811 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12812 _MM_FROUND_CUR_DIRECTION))
12813
12814 #define _mm512_getexp_ps(A) \
12815 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12816 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12817
12818 #define _mm512_mask_getexp_ps(W, U, A) \
12819 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12820 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12821
12822 #define _mm512_maskz_getexp_ps(U, A) \
12823 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12824 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12825
12826 #define _mm512_getexp_pd(A) \
12827 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12828 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12829
12830 #define _mm512_mask_getexp_pd(W, U, A) \
12831 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12832 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12833
12834 #define _mm512_maskz_getexp_pd(U, A) \
12835 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12836 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12837 #endif
12838
12839 #ifdef __OPTIMIZE__
12840 extern __inline __m512
12841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12842 _mm512_roundscale_ps (__m512 __A, const int __imm)
12843 {
12844 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12845 (__v16sf)
12846 _mm512_undefined_ps (),
12847 -1,
12848 _MM_FROUND_CUR_DIRECTION);
12849 }
12850
12851 extern __inline __m512
12852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12853 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12854 const int __imm)
12855 {
12856 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12857 (__v16sf) __A,
12858 (__mmask16) __B,
12859 _MM_FROUND_CUR_DIRECTION);
12860 }
12861
12862 extern __inline __m512
12863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12864 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12865 {
12866 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12867 __imm,
12868 (__v16sf)
12869 _mm512_setzero_ps (),
12870 (__mmask16) __A,
12871 _MM_FROUND_CUR_DIRECTION);
12872 }
12873
12874 extern __inline __m512d
12875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12876 _mm512_roundscale_pd (__m512d __A, const int __imm)
12877 {
12878 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12879 (__v8df)
12880 _mm512_undefined_pd (),
12881 -1,
12882 _MM_FROUND_CUR_DIRECTION);
12883 }
12884
12885 extern __inline __m512d
12886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12887 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12888 const int __imm)
12889 {
12890 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12891 (__v8df) __A,
12892 (__mmask8) __B,
12893 _MM_FROUND_CUR_DIRECTION);
12894 }
12895
12896 extern __inline __m512d
12897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12898 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12899 {
12900 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12901 __imm,
12902 (__v8df)
12903 _mm512_setzero_pd (),
12904 (__mmask8) __A,
12905 _MM_FROUND_CUR_DIRECTION);
12906 }
12907
12908 extern __inline __m128
12909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12910 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12911 {
12912 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12913 (__v4sf) __B, __imm,
12914 _MM_FROUND_CUR_DIRECTION);
12915 }
12916
12917 extern __inline __m128d
12918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12919 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12920 {
12921 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12922 (__v2df) __B, __imm,
12923 _MM_FROUND_CUR_DIRECTION);
12924 }
12925
12926 #else
12927 #define _mm512_roundscale_ps(A, B) \
12928 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12929 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12930 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12931 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12932 (int)(D), \
12933 (__v16sf)(__m512)(A), \
12934 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12935 #define _mm512_maskz_roundscale_ps(A, B, C) \
12936 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12937 (int)(C), \
12938 (__v16sf)_mm512_setzero_ps(),\
12939 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12940 #define _mm512_roundscale_pd(A, B) \
12941 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12942 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12943 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12944 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12945 (int)(D), \
12946 (__v8df)(__m512d)(A), \
12947 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12948 #define _mm512_maskz_roundscale_pd(A, B, C) \
12949 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12950 (int)(C), \
12951 (__v8df)_mm512_setzero_pd(),\
12952 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12953 #define _mm_roundscale_ss(A, B, C) \
12954 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12955 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12956 #define _mm_roundscale_sd(A, B, C) \
12957 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12958 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12959 #endif
12960
12961 #ifdef __OPTIMIZE__
12962 extern __inline __mmask8
12963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12964 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12965 {
12966 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12967 (__v8df) __Y, __P,
12968 (__mmask8) -1,
12969 _MM_FROUND_CUR_DIRECTION);
12970 }
12971
12972 extern __inline __mmask16
12973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12974 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12975 {
12976 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12977 (__v16sf) __Y, __P,
12978 (__mmask16) -1,
12979 _MM_FROUND_CUR_DIRECTION);
12980 }
12981
12982 extern __inline __mmask16
12983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12984 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12985 {
12986 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12987 (__v16sf) __Y, __P,
12988 (__mmask16) __U,
12989 _MM_FROUND_CUR_DIRECTION);
12990 }
12991
12992 extern __inline __mmask8
12993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12994 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12995 {
12996 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12997 (__v8df) __Y, __P,
12998 (__mmask8) __U,
12999 _MM_FROUND_CUR_DIRECTION);
13000 }
13001
13002 extern __inline __mmask8
13003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13004 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
13005 {
13006 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
13007 (__v2df) __Y, __P,
13008 (__mmask8) -1,
13009 _MM_FROUND_CUR_DIRECTION);
13010 }
13011
13012 extern __inline __mmask8
13013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13014 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
13015 {
13016 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
13017 (__v2df) __Y, __P,
13018 (__mmask8) __M,
13019 _MM_FROUND_CUR_DIRECTION);
13020 }
13021
13022 extern __inline __mmask8
13023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13024 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
13025 {
13026 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
13027 (__v4sf) __Y, __P,
13028 (__mmask8) -1,
13029 _MM_FROUND_CUR_DIRECTION);
13030 }
13031
13032 extern __inline __mmask8
13033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13034 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
13035 {
13036 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
13037 (__v4sf) __Y, __P,
13038 (__mmask8) __M,
13039 _MM_FROUND_CUR_DIRECTION);
13040 }
13041
13042 #else
13043 #define _mm512_cmp_pd_mask(X, Y, P) \
13044 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13045 (__v8df)(__m512d)(Y), (int)(P),\
13046 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13047
13048 #define _mm512_cmp_ps_mask(X, Y, P) \
13049 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13050 (__v16sf)(__m512)(Y), (int)(P),\
13051 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
13052
13053 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
13054 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13055 (__v8df)(__m512d)(Y), (int)(P),\
13056 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
13057
13058 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
13059 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13060 (__v16sf)(__m512)(Y), (int)(P),\
13061 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
13062
13063 #define _mm_cmp_sd_mask(X, Y, P) \
13064 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13065 (__v2df)(__m128d)(Y), (int)(P),\
13066 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13067
13068 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
13069 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13070 (__v2df)(__m128d)(Y), (int)(P),\
13071 M,_MM_FROUND_CUR_DIRECTION))
13072
13073 #define _mm_cmp_ss_mask(X, Y, P) \
13074 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13075 (__v4sf)(__m128)(Y), (int)(P), \
13076 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13077
13078 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
13079 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13080 (__v4sf)(__m128)(Y), (int)(P), \
13081 M,_MM_FROUND_CUR_DIRECTION))
13082 #endif
13083
13084 extern __inline __mmask16
13085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13086 _mm512_kmov (__mmask16 __A)
13087 {
13088 return __builtin_ia32_kmovw (__A);
13089 }
13090
13091 extern __inline __m512
13092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13093 _mm512_castpd_ps (__m512d __A)
13094 {
13095 return (__m512) (__A);
13096 }
13097
13098 extern __inline __m512i
13099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13100 _mm512_castpd_si512 (__m512d __A)
13101 {
13102 return (__m512i) (__A);
13103 }
13104
13105 extern __inline __m512d
13106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13107 _mm512_castps_pd (__m512 __A)
13108 {
13109 return (__m512d) (__A);
13110 }
13111
13112 extern __inline __m512i
13113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13114 _mm512_castps_si512 (__m512 __A)
13115 {
13116 return (__m512i) (__A);
13117 }
13118
13119 extern __inline __m512
13120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13121 _mm512_castsi512_ps (__m512i __A)
13122 {
13123 return (__m512) (__A);
13124 }
13125
13126 extern __inline __m512d
13127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13128 _mm512_castsi512_pd (__m512i __A)
13129 {
13130 return (__m512d) (__A);
13131 }
13132
13133 extern __inline __m128d
13134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13135 _mm512_castpd512_pd128 (__m512d __A)
13136 {
13137 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
13138 }
13139
13140 extern __inline __m128
13141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13142 _mm512_castps512_ps128 (__m512 __A)
13143 {
13144 return _mm512_extractf32x4_ps(__A, 0);
13145 }
13146
13147 extern __inline __m128i
13148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13149 _mm512_castsi512_si128 (__m512i __A)
13150 {
13151 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13152 }
13153
13154 extern __inline __m256d
13155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13156 _mm512_castpd512_pd256 (__m512d __A)
13157 {
13158 return _mm512_extractf64x4_pd(__A, 0);
13159 }
13160
13161 extern __inline __m256
13162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13163 _mm512_castps512_ps256 (__m512 __A)
13164 {
13165 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13166 }
13167
13168 extern __inline __m256i
13169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13170 _mm512_castsi512_si256 (__m512i __A)
13171 {
13172 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13173 }
13174
13175 extern __inline __m512d
13176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13177 _mm512_castpd128_pd512 (__m128d __A)
13178 {
13179 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13180 }
13181
13182 extern __inline __m512
13183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13184 _mm512_castps128_ps512 (__m128 __A)
13185 {
13186 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13187 }
13188
13189 extern __inline __m512i
13190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13191 _mm512_castsi128_si512 (__m128i __A)
13192 {
13193 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13194 }
13195
13196 extern __inline __m512d
13197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13198 _mm512_castpd256_pd512 (__m256d __A)
13199 {
13200 return __builtin_ia32_pd512_256pd (__A);
13201 }
13202
13203 extern __inline __m512
13204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13205 _mm512_castps256_ps512 (__m256 __A)
13206 {
13207 return __builtin_ia32_ps512_256ps (__A);
13208 }
13209
13210 extern __inline __m512i
13211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13212 _mm512_castsi256_si512 (__m256i __A)
13213 {
13214 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13215 }
13216
13217 extern __inline __mmask16
13218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13219 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13220 {
13221 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13222 (__v16si) __B, 0,
13223 (__mmask16) -1);
13224 }
13225
13226 extern __inline __mmask16
13227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13228 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13229 {
13230 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13231 (__v16si) __B, 0, __U);
13232 }
13233
13234 extern __inline __mmask8
13235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13236 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13237 {
13238 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13239 (__v8di) __B, 0, __U);
13240 }
13241
13242 extern __inline __mmask8
13243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13244 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13245 {
13246 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13247 (__v8di) __B, 0,
13248 (__mmask8) -1);
13249 }
13250
13251 extern __inline __mmask16
13252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13253 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13254 {
13255 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13256 (__v16si) __B, 6,
13257 (__mmask16) -1);
13258 }
13259
13260 extern __inline __mmask16
13261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13262 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13263 {
13264 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13265 (__v16si) __B, 6, __U);
13266 }
13267
13268 extern __inline __mmask8
13269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13270 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13271 {
13272 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13273 (__v8di) __B, 6, __U);
13274 }
13275
13276 extern __inline __mmask8
13277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13278 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13279 {
13280 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13281 (__v8di) __B, 6,
13282 (__mmask8) -1);
13283 }
13284
13285 #ifdef __DISABLE_AVX512F__
13286 #undef __DISABLE_AVX512F__
13287 #pragma GCC pop_options
13288 #endif /* __DISABLE_AVX512F__ */
13289
13290 #endif /* _AVX512FINTRIN_H_INCLUDED */