]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512fintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
f1717362 1/* Copyright (C) 2013-2016 Free Software Foundation, Inc.
e2098065 2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
d521a5b2 41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
e2098065 42typedef int __v16si __attribute__ ((__vector_size__ (64)));
d521a5b2 43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
1ca2dcea 44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
d521a5b2 45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
1ca2dcea 46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
d521a5b2 47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
e2098065 48
49/* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55typedef unsigned char __mmask8;
56typedef unsigned short __mmask16;
57
e2098065 58extern __inline __m512i
59__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60_mm512_set_epi64 (long long __A, long long __B, long long __C,
61 long long __D, long long __E, long long __F,
62 long long __G, long long __H)
63{
64 return __extension__ (__m512i) (__v8di)
65 { __H, __G, __F, __E, __D, __C, __B, __A };
66}
67
68/* Create the vector [A B C D E F G H I J K L M N O P]. */
69extern __inline __m512i
70__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71_mm512_set_epi32 (int __A, int __B, int __C, int __D,
72 int __E, int __F, int __G, int __H,
73 int __I, int __J, int __K, int __L,
74 int __M, int __N, int __O, int __P)
75{
76 return __extension__ (__m512i)(__v16si)
77 { __P, __O, __N, __M, __L, __K, __J, __I,
78 __H, __G, __F, __E, __D, __C, __B, __A };
79}
80
81extern __inline __m512d
82__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83_mm512_set_pd (double __A, double __B, double __C, double __D,
84 double __E, double __F, double __G, double __H)
85{
86 return __extension__ (__m512d)
87 { __H, __G, __F, __E, __D, __C, __B, __A };
88}
89
90extern __inline __m512
91__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92_mm512_set_ps (float __A, float __B, float __C, float __D,
93 float __E, float __F, float __G, float __H,
94 float __I, float __J, float __K, float __L,
95 float __M, float __N, float __O, float __P)
96{
97 return __extension__ (__m512)
98 { __P, __O, __N, __M, __L, __K, __J, __I,
99 __H, __G, __F, __E, __D, __C, __B, __A };
100}
101
102#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
103 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
104
105#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
106 e8,e9,e10,e11,e12,e13,e14,e15) \
107 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
108
109#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
110 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
111
112#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
113 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
114
0fc245cd 115extern __inline __m512
116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117_mm512_undefined_ps (void)
118{
119 __m512 __Y = __Y;
120 return __Y;
121}
122
123extern __inline __m512d
124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
125_mm512_undefined_pd (void)
126{
127 __m512d __Y = __Y;
128 return __Y;
129}
130
131extern __inline __m512i
132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133_mm512_undefined_si512 (void)
134{
135 __m512i __Y = __Y;
136 return __Y;
137}
138
1ca2dcea 139extern __inline __m512i
140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
141_mm512_set1_epi8 (char __A)
142{
143 return __extension__ (__m512i)(__v64qi)
144 { __A, __A, __A, __A, __A, __A, __A, __A,
145 __A, __A, __A, __A, __A, __A, __A, __A,
146 __A, __A, __A, __A, __A, __A, __A, __A,
147 __A, __A, __A, __A, __A, __A, __A, __A,
148 __A, __A, __A, __A, __A, __A, __A, __A,
149 __A, __A, __A, __A, __A, __A, __A, __A,
150 __A, __A, __A, __A, __A, __A, __A, __A,
151 __A, __A, __A, __A, __A, __A, __A, __A };
152}
153
154extern __inline __m512i
155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
156_mm512_set1_epi16 (short __A)
157{
158 return __extension__ (__m512i)(__v32hi)
159 { __A, __A, __A, __A, __A, __A, __A, __A,
160 __A, __A, __A, __A, __A, __A, __A, __A,
161 __A, __A, __A, __A, __A, __A, __A, __A,
162 __A, __A, __A, __A, __A, __A, __A, __A };
163}
164
56afa794 165extern __inline __m512d
166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
167_mm512_set1_pd (double __A)
168{
169 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
170 (__v2df) { __A, },
171 (__v8df)
172 _mm512_undefined_pd (),
173 (__mmask8) -1);
174}
175
176extern __inline __m512
177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178_mm512_set1_ps (float __A)
179{
180 return (__m512) __builtin_ia32_broadcastss512 (__extension__
181 (__v4sf) { __A, },
182 (__v16sf)
183 _mm512_undefined_ps (),
184 (__mmask16) -1);
185}
186
1ca2dcea 187/* Create the vector [A B C D A B C D A B C D A B C D]. */
188extern __inline __m512i
189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
191{
192 return __extension__ (__m512i)(__v16si)
193 { __D, __C, __B, __A, __D, __C, __B, __A,
194 __D, __C, __B, __A, __D, __C, __B, __A };
195}
196
197extern __inline __m512i
198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199_mm512_set4_epi64 (long long __A, long long __B, long long __C,
200 long long __D)
201{
202 return __extension__ (__m512i) (__v8di)
203 { __D, __C, __B, __A, __D, __C, __B, __A };
204}
205
206extern __inline __m512d
207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
208_mm512_set4_pd (double __A, double __B, double __C, double __D)
209{
210 return __extension__ (__m512d)
211 { __D, __C, __B, __A, __D, __C, __B, __A };
212}
213
214extern __inline __m512
215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
216_mm512_set4_ps (float __A, float __B, float __C, float __D)
217{
218 return __extension__ (__m512)
219 { __D, __C, __B, __A, __D, __C, __B, __A,
220 __D, __C, __B, __A, __D, __C, __B, __A };
221}
222
223#define _mm512_setr4_epi64(e0,e1,e2,e3) \
224 _mm512_set4_epi64(e3,e2,e1,e0)
225
226#define _mm512_setr4_epi32(e0,e1,e2,e3) \
227 _mm512_set4_epi32(e3,e2,e1,e0)
228
229#define _mm512_setr4_pd(e0,e1,e2,e3) \
230 _mm512_set4_pd(e3,e2,e1,e0)
231
232#define _mm512_setr4_ps(e0,e1,e2,e3) \
233 _mm512_set4_ps(e3,e2,e1,e0)
234
e2098065 235extern __inline __m512
236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
237_mm512_setzero_ps (void)
238{
239 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
240 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
241}
242
243extern __inline __m512d
244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245_mm512_setzero_pd (void)
246{
247 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
248}
249
1ca2dcea 250extern __inline __m512i
251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252_mm512_setzero_epi32 (void)
253{
254 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
255}
256
e2098065 257extern __inline __m512i
258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259_mm512_setzero_si512 (void)
260{
261 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
262}
263
264extern __inline __m512d
265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
267{
268 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
269 (__v8df) __W,
270 (__mmask8) __U);
271}
272
273extern __inline __m512d
274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
276{
277 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
278 (__v8df)
279 _mm512_setzero_pd (),
280 (__mmask8) __U);
281}
282
283extern __inline __m512
284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
285_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
286{
287 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
288 (__v16sf) __W,
289 (__mmask16) __U);
290}
291
292extern __inline __m512
293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
294_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
295{
296 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
297 (__v16sf)
298 _mm512_setzero_ps (),
299 (__mmask16) __U);
300}
301
302extern __inline __m512d
303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
304_mm512_load_pd (void const *__P)
305{
306 return *(__m512d *) __P;
307}
308
309extern __inline __m512d
310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
312{
313 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
314 (__v8df) __W,
315 (__mmask8) __U);
316}
317
318extern __inline __m512d
319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
321{
322 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
323 (__v8df)
324 _mm512_setzero_pd (),
325 (__mmask8) __U);
326}
327
328extern __inline void
329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330_mm512_store_pd (void *__P, __m512d __A)
331{
332 *(__m512d *) __P = __A;
333}
334
335extern __inline void
336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
338{
339 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
340 (__mmask8) __U);
341}
342
343extern __inline __m512
344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345_mm512_load_ps (void const *__P)
346{
347 return *(__m512 *) __P;
348}
349
350extern __inline __m512
351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
353{
354 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
355 (__v16sf) __W,
356 (__mmask16) __U);
357}
358
359extern __inline __m512
360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
361_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
362{
363 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
364 (__v16sf)
365 _mm512_setzero_ps (),
366 (__mmask16) __U);
367}
368
369extern __inline void
370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
371_mm512_store_ps (void *__P, __m512 __A)
372{
373 *(__m512 *) __P = __A;
374}
375
376extern __inline void
377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
379{
380 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
381 (__mmask16) __U);
382}
383
384extern __inline __m512i
385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
386_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
387{
388 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
389 (__v8di) __W,
390 (__mmask8) __U);
391}
392
393extern __inline __m512i
394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
396{
397 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
398 (__v8di)
399 _mm512_setzero_si512 (),
400 (__mmask8) __U);
401}
402
403extern __inline __m512i
404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405_mm512_load_epi64 (void const *__P)
406{
407 return *(__m512i *) __P;
408}
409
410extern __inline __m512i
411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
413{
414 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
415 (__v8di) __W,
416 (__mmask8) __U);
417}
418
419extern __inline __m512i
420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
421_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
422{
423 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
424 (__v8di)
425 _mm512_setzero_si512 (),
426 (__mmask8) __U);
427}
428
429extern __inline void
430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431_mm512_store_epi64 (void *__P, __m512i __A)
432{
433 *(__m512i *) __P = __A;
434}
435
436extern __inline void
437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
439{
440 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
441 (__mmask8) __U);
442}
443
444extern __inline __m512i
445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
447{
448 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
449 (__v16si) __W,
450 (__mmask16) __U);
451}
452
453extern __inline __m512i
454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
455_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
456{
457 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
458 (__v16si)
459 _mm512_setzero_si512 (),
460 (__mmask16) __U);
461}
462
463extern __inline __m512i
464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
465_mm512_load_si512 (void const *__P)
466{
467 return *(__m512i *) __P;
468}
469
470extern __inline __m512i
471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472_mm512_load_epi32 (void const *__P)
473{
474 return *(__m512i *) __P;
475}
476
477extern __inline __m512i
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
480{
481 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
482 (__v16si) __W,
483 (__mmask16) __U);
484}
485
486extern __inline __m512i
487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
489{
490 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
491 (__v16si)
492 _mm512_setzero_si512 (),
493 (__mmask16) __U);
494}
495
496extern __inline void
497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
498_mm512_store_si512 (void *__P, __m512i __A)
499{
500 *(__m512i *) __P = __A;
501}
502
503extern __inline void
504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505_mm512_store_epi32 (void *__P, __m512i __A)
506{
507 *(__m512i *) __P = __A;
508}
509
510extern __inline void
511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
513{
514 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
515 (__mmask16) __U);
516}
517
518extern __inline __m512i
519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520_mm512_mullo_epi32 (__m512i __A, __m512i __B)
521{
d521a5b2 522 return (__m512i) ((__v16su) __A * (__v16su) __B);
e2098065 523}
524
525extern __inline __m512i
526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
528{
529 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
530 (__v16si) __B,
531 (__v16si)
532 _mm512_setzero_si512 (),
533 __M);
534}
535
536extern __inline __m512i
537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
539{
540 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
541 (__v16si) __B,
542 (__v16si) __W, __M);
543}
544
545extern __inline __m512i
546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
548{
549 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
550 (__v16si) __Y,
551 (__v16si)
0fc245cd 552 _mm512_undefined_si512 (),
e2098065 553 (__mmask16) -1);
554}
555
556extern __inline __m512i
557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
559{
560 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
561 (__v16si) __Y,
562 (__v16si) __W,
563 (__mmask16) __U);
564}
565
566extern __inline __m512i
567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
569{
570 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
571 (__v16si) __Y,
572 (__v16si)
573 _mm512_setzero_si512 (),
574 (__mmask16) __U);
575}
576
577extern __inline __m512i
578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
579_mm512_srav_epi32 (__m512i __X, __m512i __Y)
580{
581 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
582 (__v16si) __Y,
583 (__v16si)
0fc245cd 584 _mm512_undefined_si512 (),
e2098065 585 (__mmask16) -1);
586}
587
588extern __inline __m512i
589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
590_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
591{
592 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
593 (__v16si) __Y,
594 (__v16si) __W,
595 (__mmask16) __U);
596}
597
598extern __inline __m512i
599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
601{
602 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
603 (__v16si) __Y,
604 (__v16si)
605 _mm512_setzero_si512 (),
606 (__mmask16) __U);
607}
608
609extern __inline __m512i
610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
612{
613 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
614 (__v16si) __Y,
615 (__v16si)
0fc245cd 616 _mm512_undefined_si512 (),
e2098065 617 (__mmask16) -1);
618}
619
620extern __inline __m512i
621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
623{
624 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
625 (__v16si) __Y,
626 (__v16si) __W,
627 (__mmask16) __U);
628}
629
630extern __inline __m512i
631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
632_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
633{
634 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
635 (__v16si) __Y,
636 (__v16si)
637 _mm512_setzero_si512 (),
638 (__mmask16) __U);
639}
640
641extern __inline __m512i
642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643_mm512_add_epi64 (__m512i __A, __m512i __B)
644{
d521a5b2 645 return (__m512i) ((__v8du) __A + (__v8du) __B);
e2098065 646}
647
648extern __inline __m512i
649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
651{
652 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
653 (__v8di) __B,
654 (__v8di) __W,
655 (__mmask8) __U);
656}
657
658extern __inline __m512i
659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
660_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
661{
662 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
663 (__v8di) __B,
664 (__v8di)
665 _mm512_setzero_si512 (),
666 (__mmask8) __U);
667}
668
669extern __inline __m512i
670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
671_mm512_sub_epi64 (__m512i __A, __m512i __B)
672{
d521a5b2 673 return (__m512i) ((__v8du) __A - (__v8du) __B);
e2098065 674}
675
676extern __inline __m512i
677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
679{
680 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
681 (__v8di) __B,
682 (__v8di) __W,
683 (__mmask8) __U);
684}
685
686extern __inline __m512i
687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
688_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
689{
690 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
691 (__v8di) __B,
692 (__v8di)
693 _mm512_setzero_si512 (),
694 (__mmask8) __U);
695}
696
697extern __inline __m512i
698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
700{
701 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
702 (__v8di) __Y,
703 (__v8di)
0fc245cd 704 _mm512_undefined_pd (),
e2098065 705 (__mmask8) -1);
706}
707
708extern __inline __m512i
709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
710_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
711{
712 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
713 (__v8di) __Y,
714 (__v8di) __W,
715 (__mmask8) __U);
716}
717
718extern __inline __m512i
719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
720_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
721{
722 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
723 (__v8di) __Y,
724 (__v8di)
725 _mm512_setzero_si512 (),
726 (__mmask8) __U);
727}
728
729extern __inline __m512i
730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731_mm512_srav_epi64 (__m512i __X, __m512i __Y)
732{
733 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
734 (__v8di) __Y,
735 (__v8di)
0fc245cd 736 _mm512_undefined_si512 (),
e2098065 737 (__mmask8) -1);
738}
739
740extern __inline __m512i
741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
743{
744 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
745 (__v8di) __Y,
746 (__v8di) __W,
747 (__mmask8) __U);
748}
749
750extern __inline __m512i
751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
752_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
753{
754 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
755 (__v8di) __Y,
756 (__v8di)
757 _mm512_setzero_si512 (),
758 (__mmask8) __U);
759}
760
761extern __inline __m512i
762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
764{
765 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
766 (__v8di) __Y,
767 (__v8di)
0fc245cd 768 _mm512_undefined_si512 (),
e2098065 769 (__mmask8) -1);
770}
771
772extern __inline __m512i
773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
774_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
775{
776 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
777 (__v8di) __Y,
778 (__v8di) __W,
779 (__mmask8) __U);
780}
781
782extern __inline __m512i
783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
784_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
785{
786 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
787 (__v8di) __Y,
788 (__v8di)
789 _mm512_setzero_si512 (),
790 (__mmask8) __U);
791}
792
793extern __inline __m512i
794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795_mm512_add_epi32 (__m512i __A, __m512i __B)
796{
d521a5b2 797 return (__m512i) ((__v16su) __A + (__v16su) __B);
e2098065 798}
799
800extern __inline __m512i
801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
803{
804 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
805 (__v16si) __B,
806 (__v16si) __W,
807 (__mmask16) __U);
808}
809
810extern __inline __m512i
811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
812_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
813{
814 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
815 (__v16si) __B,
816 (__v16si)
817 _mm512_setzero_si512 (),
818 (__mmask16) __U);
819}
820
821extern __inline __m512i
822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823_mm512_mul_epi32 (__m512i __X, __m512i __Y)
824{
825 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
826 (__v16si) __Y,
827 (__v8di)
0fc245cd 828 _mm512_undefined_si512 (),
e2098065 829 (__mmask8) -1);
830}
831
832extern __inline __m512i
833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
835{
836 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
837 (__v16si) __Y,
838 (__v8di) __W, __M);
839}
840
841extern __inline __m512i
842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
844{
845 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
846 (__v16si) __Y,
847 (__v8di)
848 _mm512_setzero_si512 (),
849 __M);
850}
851
852extern __inline __m512i
853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
854_mm512_sub_epi32 (__m512i __A, __m512i __B)
855{
d521a5b2 856 return (__m512i) ((__v16su) __A - (__v16su) __B);
e2098065 857}
858
859extern __inline __m512i
860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
862{
863 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
864 (__v16si) __B,
865 (__v16si) __W,
866 (__mmask16) __U);
867}
868
869extern __inline __m512i
870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
872{
873 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
874 (__v16si) __B,
875 (__v16si)
876 _mm512_setzero_si512 (),
877 (__mmask16) __U);
878}
879
880extern __inline __m512i
881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882_mm512_mul_epu32 (__m512i __X, __m512i __Y)
883{
884 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
885 (__v16si) __Y,
886 (__v8di)
0fc245cd 887 _mm512_undefined_si512 (),
e2098065 888 (__mmask8) -1);
889}
890
891extern __inline __m512i
892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
894{
895 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
896 (__v16si) __Y,
897 (__v8di) __W, __M);
898}
899
900extern __inline __m512i
901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
903{
904 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
905 (__v16si) __Y,
906 (__v8di)
907 _mm512_setzero_si512 (),
908 __M);
909}
910
911#ifdef __OPTIMIZE__
912extern __inline __m512i
913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914_mm512_slli_epi64 (__m512i __A, unsigned int __B)
915{
916 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
917 (__v8di)
0fc245cd 918 _mm512_undefined_si512 (),
e2098065 919 (__mmask8) -1);
920}
921
922extern __inline __m512i
923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
924_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
925 unsigned int __B)
926{
927 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
928 (__v8di) __W,
929 (__mmask8) __U);
930}
931
932extern __inline __m512i
933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
935{
936 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
937 (__v8di)
938 _mm512_setzero_si512 (),
939 (__mmask8) __U);
940}
941#else
942#define _mm512_slli_epi64(X, C) \
943 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
0fc245cd 944 (__v8di)(__m512i)_mm512_undefined_si512 (),\
e2098065 945 (__mmask8)-1))
946
947#define _mm512_mask_slli_epi64(W, U, X, C) \
948 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
949 (__v8di)(__m512i)(W),\
950 (__mmask8)(U)))
951
952#define _mm512_maskz_slli_epi64(U, X, C) \
953 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
954 (__v8di)(__m512i)_mm512_setzero_si512 (),\
955 (__mmask8)(U)))
956#endif
957
958extern __inline __m512i
959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960_mm512_sll_epi64 (__m512i __A, __m128i __B)
961{
962 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
963 (__v2di) __B,
964 (__v8di)
0fc245cd 965 _mm512_undefined_si512 (),
e2098065 966 (__mmask8) -1);
967}
968
969extern __inline __m512i
970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
972{
973 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
974 (__v2di) __B,
975 (__v8di) __W,
976 (__mmask8) __U);
977}
978
979extern __inline __m512i
980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
981_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
982{
983 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
984 (__v2di) __B,
985 (__v8di)
986 _mm512_setzero_si512 (),
987 (__mmask8) __U);
988}
989
990#ifdef __OPTIMIZE__
991extern __inline __m512i
992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
993_mm512_srli_epi64 (__m512i __A, unsigned int __B)
994{
995 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
996 (__v8di)
0fc245cd 997 _mm512_undefined_si512 (),
e2098065 998 (__mmask8) -1);
999}
1000
1001extern __inline __m512i
1002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1004 __m512i __A, unsigned int __B)
1005{
1006 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1007 (__v8di) __W,
1008 (__mmask8) __U);
1009}
1010
1011extern __inline __m512i
1012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1014{
1015 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1016 (__v8di)
1017 _mm512_setzero_si512 (),
1018 (__mmask8) __U);
1019}
1020#else
1021#define _mm512_srli_epi64(X, C) \
1022 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
0fc245cd 1023 (__v8di)(__m512i)_mm512_undefined_si512 (),\
e2098065 1024 (__mmask8)-1))
1025
1026#define _mm512_mask_srli_epi64(W, U, X, C) \
1027 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1028 (__v8di)(__m512i)(W),\
1029 (__mmask8)(U)))
1030
1031#define _mm512_maskz_srli_epi64(U, X, C) \
1032 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1033 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1034 (__mmask8)(U)))
1035#endif
1036
1037extern __inline __m512i
1038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1039_mm512_srl_epi64 (__m512i __A, __m128i __B)
1040{
1041 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1042 (__v2di) __B,
1043 (__v8di)
0fc245cd 1044 _mm512_undefined_si512 (),
e2098065 1045 (__mmask8) -1);
1046}
1047
1048extern __inline __m512i
1049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1050_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1051{
1052 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1053 (__v2di) __B,
1054 (__v8di) __W,
1055 (__mmask8) __U);
1056}
1057
1058extern __inline __m512i
1059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1061{
1062 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1063 (__v2di) __B,
1064 (__v8di)
1065 _mm512_setzero_si512 (),
1066 (__mmask8) __U);
1067}
1068
1069#ifdef __OPTIMIZE__
1070extern __inline __m512i
1071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1073{
1074 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1075 (__v8di)
0fc245cd 1076 _mm512_undefined_si512 (),
e2098065 1077 (__mmask8) -1);
1078}
1079
1080extern __inline __m512i
1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1083 unsigned int __B)
1084{
1085 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1086 (__v8di) __W,
1087 (__mmask8) __U);
1088}
1089
1090extern __inline __m512i
1091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1093{
1094 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1095 (__v8di)
1096 _mm512_setzero_si512 (),
1097 (__mmask8) __U);
1098}
1099#else
1100#define _mm512_srai_epi64(X, C) \
1101 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
0fc245cd 1102 (__v8di)(__m512i)_mm512_undefined_si512 (),\
e2098065 1103 (__mmask8)-1))
1104
1105#define _mm512_mask_srai_epi64(W, U, X, C) \
1106 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1107 (__v8di)(__m512i)(W),\
1108 (__mmask8)(U)))
1109
1110#define _mm512_maskz_srai_epi64(U, X, C) \
1111 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1112 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1113 (__mmask8)(U)))
1114#endif
1115
1116extern __inline __m512i
1117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1118_mm512_sra_epi64 (__m512i __A, __m128i __B)
1119{
1120 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1121 (__v2di) __B,
1122 (__v8di)
0fc245cd 1123 _mm512_undefined_si512 (),
e2098065 1124 (__mmask8) -1);
1125}
1126
1127extern __inline __m512i
1128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1130{
1131 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1132 (__v2di) __B,
1133 (__v8di) __W,
1134 (__mmask8) __U);
1135}
1136
1137extern __inline __m512i
1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1140{
1141 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1142 (__v2di) __B,
1143 (__v8di)
1144 _mm512_setzero_si512 (),
1145 (__mmask8) __U);
1146}
1147
1148#ifdef __OPTIMIZE__
1149extern __inline __m512i
1150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1152{
1153 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1154 (__v16si)
0fc245cd 1155 _mm512_undefined_si512 (),
e2098065 1156 (__mmask16) -1);
1157}
1158
1159extern __inline __m512i
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1162 unsigned int __B)
1163{
1164 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1165 (__v16si) __W,
1166 (__mmask16) __U);
1167}
1168
1169extern __inline __m512i
1170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1172{
1173 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1174 (__v16si)
1175 _mm512_setzero_si512 (),
1176 (__mmask16) __U);
1177}
1178#else
1179#define _mm512_slli_epi32(X, C) \
1180 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
0fc245cd 1181 (__v16si)(__m512i)_mm512_undefined_si512 (),\
e2098065 1182 (__mmask16)-1))
1183
1184#define _mm512_mask_slli_epi32(W, U, X, C) \
1185 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1186 (__v16si)(__m512i)(W),\
1187 (__mmask16)(U)))
1188
1189#define _mm512_maskz_slli_epi32(U, X, C) \
1190 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1191 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1192 (__mmask16)(U)))
1193#endif
1194
1195extern __inline __m512i
1196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197_mm512_sll_epi32 (__m512i __A, __m128i __B)
1198{
1199 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1200 (__v4si) __B,
1201 (__v16si)
0fc245cd 1202 _mm512_undefined_si512 (),
e2098065 1203 (__mmask16) -1);
1204}
1205
1206extern __inline __m512i
1207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1209{
1210 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1211 (__v4si) __B,
1212 (__v16si) __W,
1213 (__mmask16) __U);
1214}
1215
1216extern __inline __m512i
1217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1219{
1220 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1221 (__v4si) __B,
1222 (__v16si)
1223 _mm512_setzero_si512 (),
1224 (__mmask16) __U);
1225}
1226
1227#ifdef __OPTIMIZE__
1228extern __inline __m512i
1229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1230_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1231{
1232 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1233 (__v16si)
0fc245cd 1234 _mm512_undefined_si512 (),
e2098065 1235 (__mmask16) -1);
1236}
1237
1238extern __inline __m512i
1239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1241 __m512i __A, unsigned int __B)
1242{
1243 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1244 (__v16si) __W,
1245 (__mmask16) __U);
1246}
1247
1248extern __inline __m512i
1249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1251{
1252 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1253 (__v16si)
1254 _mm512_setzero_si512 (),
1255 (__mmask16) __U);
1256}
1257#else
1258#define _mm512_srli_epi32(X, C) \
1259 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
0fc245cd 1260 (__v16si)(__m512i)_mm512_undefined_si512 (),\
e2098065 1261 (__mmask16)-1))
1262
1263#define _mm512_mask_srli_epi32(W, U, X, C) \
1264 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1265 (__v16si)(__m512i)(W),\
1266 (__mmask16)(U)))
1267
1268#define _mm512_maskz_srli_epi32(U, X, C) \
1269 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1270 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1271 (__mmask16)(U)))
1272#endif
1273
1274extern __inline __m512i
1275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1276_mm512_srl_epi32 (__m512i __A, __m128i __B)
1277{
1278 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1279 (__v4si) __B,
1280 (__v16si)
0fc245cd 1281 _mm512_undefined_si512 (),
e2098065 1282 (__mmask16) -1);
1283}
1284
1285extern __inline __m512i
1286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1287_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1288{
1289 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1290 (__v4si) __B,
1291 (__v16si) __W,
1292 (__mmask16) __U);
1293}
1294
1295extern __inline __m512i
1296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1298{
1299 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1300 (__v4si) __B,
1301 (__v16si)
1302 _mm512_setzero_si512 (),
1303 (__mmask16) __U);
1304}
1305
1306#ifdef __OPTIMIZE__
1307extern __inline __m512i
1308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1309_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1310{
1311 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1312 (__v16si)
0fc245cd 1313 _mm512_undefined_si512 (),
e2098065 1314 (__mmask16) -1);
1315}
1316
1317extern __inline __m512i
1318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1320 unsigned int __B)
1321{
1322 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1323 (__v16si) __W,
1324 (__mmask16) __U);
1325}
1326
1327extern __inline __m512i
1328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1330{
1331 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1332 (__v16si)
1333 _mm512_setzero_si512 (),
1334 (__mmask16) __U);
1335}
1336#else
1337#define _mm512_srai_epi32(X, C) \
1338 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
0fc245cd 1339 (__v16si)(__m512i)_mm512_undefined_si512 (),\
e2098065 1340 (__mmask16)-1))
1341
1342#define _mm512_mask_srai_epi32(W, U, X, C) \
1343 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1344 (__v16si)(__m512i)(W),\
1345 (__mmask16)(U)))
1346
1347#define _mm512_maskz_srai_epi32(U, X, C) \
1348 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1349 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1350 (__mmask16)(U)))
1351#endif
1352
1353extern __inline __m512i
1354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355_mm512_sra_epi32 (__m512i __A, __m128i __B)
1356{
1357 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1358 (__v4si) __B,
1359 (__v16si)
0fc245cd 1360 _mm512_undefined_si512 (),
e2098065 1361 (__mmask16) -1);
1362}
1363
1364extern __inline __m512i
1365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1367{
1368 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1369 (__v4si) __B,
1370 (__v16si) __W,
1371 (__mmask16) __U);
1372}
1373
1374extern __inline __m512i
1375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1377{
1378 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1379 (__v4si) __B,
1380 (__v16si)
1381 _mm512_setzero_si512 (),
1382 (__mmask16) __U);
1383}
1384
0b7cc9c6 1385#ifdef __OPTIMIZE__
1386extern __inline __m128d
1387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1389{
1390 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1391 (__v2df) __B,
1392 __R);
1393}
1394
1395extern __inline __m128
1396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1398{
1399 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1400 (__v4sf) __B,
1401 __R);
1402}
1403
1404extern __inline __m128d
1405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1406_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1407{
1408 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1409 (__v2df) __B,
1410 __R);
1411}
1412
1413extern __inline __m128
1414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1415_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1416{
1417 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1418 (__v4sf) __B,
1419 __R);
1420}
1421
1422#else
1423#define _mm_add_round_sd(A, B, C) \
1424 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1425
1426#define _mm_add_round_ss(A, B, C) \
1427 (__m128)__builtin_ia32_addss_round(A, B, C)
1428
1429#define _mm_sub_round_sd(A, B, C) \
1430 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1431
1432#define _mm_sub_round_ss(A, B, C) \
1433 (__m128)__builtin_ia32_subss_round(A, B, C)
1434#endif
1435
e2098065 1436#ifdef __OPTIMIZE__
1437extern __inline __m512i
1438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1439_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1440{
1441 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1442 (__v8di) __B,
1443 (__v8di) __C, imm,
1444 (__mmask8) -1);
1445}
1446
1447extern __inline __m512i
1448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1449_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1450 __m512i __C, const int imm)
1451{
1452 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1453 (__v8di) __B,
1454 (__v8di) __C, imm,
1455 (__mmask8) __U);
1456}
1457
1458extern __inline __m512i
1459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1461 __m512i __C, const int imm)
1462{
1463 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1464 (__v8di) __B,
1465 (__v8di) __C,
1466 imm, (__mmask8) __U);
1467}
1468
1469extern __inline __m512i
1470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1472{
1473 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1474 (__v16si) __B,
1475 (__v16si) __C,
1476 imm, (__mmask16) -1);
1477}
1478
1479extern __inline __m512i
1480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1482 __m512i __C, const int imm)
1483{
1484 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1485 (__v16si) __B,
1486 (__v16si) __C,
1487 imm, (__mmask16) __U);
1488}
1489
1490extern __inline __m512i
1491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1493 __m512i __C, const int imm)
1494{
1495 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1496 (__v16si) __B,
1497 (__v16si) __C,
1498 imm, (__mmask16) __U);
1499}
1500#else
1501#define _mm512_ternarylogic_epi64(A, B, C, I) \
1502 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1503 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1504#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1505 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1506 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1507#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1508 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1509 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1510#define _mm512_ternarylogic_epi32(A, B, C, I) \
1511 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1512 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1513 (__mmask16)-1))
1514#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1515 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1516 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1517 (__mmask16)(U)))
1518#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1519 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1520 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1521 (__mmask16)(U)))
1522#endif
1523
1524extern __inline __m512d
1525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1526_mm512_rcp14_pd (__m512d __A)
1527{
1528 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1529 (__v8df)
0fc245cd 1530 _mm512_undefined_pd (),
e2098065 1531 (__mmask8) -1);
1532}
1533
1534extern __inline __m512d
1535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1537{
1538 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1539 (__v8df) __W,
1540 (__mmask8) __U);
1541}
1542
1543extern __inline __m512d
1544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1546{
1547 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1548 (__v8df)
1549 _mm512_setzero_pd (),
1550 (__mmask8) __U);
1551}
1552
1553extern __inline __m512
1554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1555_mm512_rcp14_ps (__m512 __A)
1556{
1557 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1558 (__v16sf)
0fc245cd 1559 _mm512_undefined_ps (),
e2098065 1560 (__mmask16) -1);
1561}
1562
1563extern __inline __m512
1564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1566{
1567 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1568 (__v16sf) __W,
1569 (__mmask16) __U);
1570}
1571
1572extern __inline __m512
1573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1575{
1576 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1577 (__v16sf)
1578 _mm512_setzero_ps (),
1579 (__mmask16) __U);
1580}
1581
0b7cc9c6 1582extern __inline __m128d
1583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584_mm_rcp14_sd (__m128d __A, __m128d __B)
1585{
c4f782fd 1586 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1587 (__v2df) __A);
0b7cc9c6 1588}
1589
1590extern __inline __m128
1591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592_mm_rcp14_ss (__m128 __A, __m128 __B)
1593{
c4f782fd 1594 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1595 (__v4sf) __A);
0b7cc9c6 1596}
1597
e2098065 1598extern __inline __m512d
1599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600_mm512_rsqrt14_pd (__m512d __A)
1601{
1602 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1603 (__v8df)
0fc245cd 1604 _mm512_undefined_pd (),
e2098065 1605 (__mmask8) -1);
1606}
1607
1608extern __inline __m512d
1609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1611{
1612 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1613 (__v8df) __W,
1614 (__mmask8) __U);
1615}
1616
1617extern __inline __m512d
1618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1620{
1621 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1622 (__v8df)
1623 _mm512_setzero_pd (),
1624 (__mmask8) __U);
1625}
1626
1627extern __inline __m512
1628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1629_mm512_rsqrt14_ps (__m512 __A)
1630{
1631 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1632 (__v16sf)
0fc245cd 1633 _mm512_undefined_ps (),
e2098065 1634 (__mmask16) -1);
1635}
1636
1637extern __inline __m512
1638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1639_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1640{
1641 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1642 (__v16sf) __W,
1643 (__mmask16) __U);
1644}
1645
1646extern __inline __m512
1647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1649{
1650 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1651 (__v16sf)
1652 _mm512_setzero_ps (),
1653 (__mmask16) __U);
1654}
1655
0b7cc9c6 1656extern __inline __m128d
1657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1658_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1659{
c4f782fd 1660 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1661 (__v2df) __A);
0b7cc9c6 1662}
1663
1664extern __inline __m128
1665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1666_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1667{
c4f782fd 1668 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1669 (__v4sf) __A);
0b7cc9c6 1670}
1671
e2098065 1672#ifdef __OPTIMIZE__
1673extern __inline __m512d
1674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675_mm512_sqrt_round_pd (__m512d __A, const int __R)
1676{
1677 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1678 (__v8df)
0fc245cd 1679 _mm512_undefined_pd (),
e2098065 1680 (__mmask8) -1, __R);
1681}
1682
1683extern __inline __m512d
1684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1686 const int __R)
1687{
1688 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1689 (__v8df) __W,
1690 (__mmask8) __U, __R);
1691}
1692
1693extern __inline __m512d
1694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1696{
1697 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1698 (__v8df)
1699 _mm512_setzero_pd (),
1700 (__mmask8) __U, __R);
1701}
1702
1703extern __inline __m512
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm512_sqrt_round_ps (__m512 __A, const int __R)
1706{
1707 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1708 (__v16sf)
0fc245cd 1709 _mm512_undefined_ps (),
e2098065 1710 (__mmask16) -1, __R);
1711}
1712
1713extern __inline __m512
1714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1716{
1717 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1718 (__v16sf) __W,
1719 (__mmask16) __U, __R);
1720}
1721
1722extern __inline __m512
1723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1724_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1725{
1726 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1727 (__v16sf)
1728 _mm512_setzero_ps (),
1729 (__mmask16) __U, __R);
1730}
1731
0b7cc9c6 1732extern __inline __m128d
1733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1735{
1736 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1737 (__v2df) __A,
1738 __R);
1739}
1740
1741extern __inline __m128
1742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1744{
1745 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1746 (__v4sf) __A,
1747 __R);
1748}
e2098065 1749#else
1750#define _mm512_sqrt_round_pd(A, C) \
0fc245cd 1751 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
e2098065 1752
1753#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1754 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1755
1756#define _mm512_maskz_sqrt_round_pd(U, A, C) \
1757 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1758
1759#define _mm512_sqrt_round_ps(A, C) \
0fc245cd 1760 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
e2098065 1761
1762#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1763 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1764
1765#define _mm512_maskz_sqrt_round_ps(U, A, C) \
1766 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
0b7cc9c6 1767
1768#define _mm_sqrt_round_sd(A, B, C) \
1769 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1770
1771#define _mm_sqrt_round_ss(A, B, C) \
1772 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
e2098065 1773#endif
1774
1775extern __inline __m512i
1776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777_mm512_cvtepi8_epi32 (__m128i __A)
1778{
1779 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1780 (__v16si)
0fc245cd 1781 _mm512_undefined_si512 (),
e2098065 1782 (__mmask16) -1);
1783}
1784
1785extern __inline __m512i
1786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1788{
1789 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1790 (__v16si) __W,
1791 (__mmask16) __U);
1792}
1793
1794extern __inline __m512i
1795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1797{
1798 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1799 (__v16si)
1800 _mm512_setzero_si512 (),
1801 (__mmask16) __U);
1802}
1803
1804extern __inline __m512i
1805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1806_mm512_cvtepi8_epi64 (__m128i __A)
1807{
1808 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1809 (__v8di)
0fc245cd 1810 _mm512_undefined_si512 (),
e2098065 1811 (__mmask8) -1);
1812}
1813
1814extern __inline __m512i
1815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1816_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1817{
1818 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1819 (__v8di) __W,
1820 (__mmask8) __U);
1821}
1822
1823extern __inline __m512i
1824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1826{
1827 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1828 (__v8di)
1829 _mm512_setzero_si512 (),
1830 (__mmask8) __U);
1831}
1832
1833extern __inline __m512i
1834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1835_mm512_cvtepi16_epi32 (__m256i __A)
1836{
1837 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1838 (__v16si)
0fc245cd 1839 _mm512_undefined_si512 (),
e2098065 1840 (__mmask16) -1);
1841}
1842
1843extern __inline __m512i
1844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1846{
1847 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1848 (__v16si) __W,
1849 (__mmask16) __U);
1850}
1851
1852extern __inline __m512i
1853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1855{
1856 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1857 (__v16si)
1858 _mm512_setzero_si512 (),
1859 (__mmask16) __U);
1860}
1861
1862extern __inline __m512i
1863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1864_mm512_cvtepi16_epi64 (__m128i __A)
1865{
1866 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1867 (__v8di)
0fc245cd 1868 _mm512_undefined_si512 (),
e2098065 1869 (__mmask8) -1);
1870}
1871
1872extern __inline __m512i
1873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1875{
1876 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1877 (__v8di) __W,
1878 (__mmask8) __U);
1879}
1880
1881extern __inline __m512i
1882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1884{
1885 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1886 (__v8di)
1887 _mm512_setzero_si512 (),
1888 (__mmask8) __U);
1889}
1890
1891extern __inline __m512i
1892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1893_mm512_cvtepi32_epi64 (__m256i __X)
1894{
1895 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1896 (__v8di)
0fc245cd 1897 _mm512_undefined_si512 (),
e2098065 1898 (__mmask8) -1);
1899}
1900
1901extern __inline __m512i
1902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1903_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1904{
1905 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1906 (__v8di) __W,
1907 (__mmask8) __U);
1908}
1909
1910extern __inline __m512i
1911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1912_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1913{
1914 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1915 (__v8di)
1916 _mm512_setzero_si512 (),
1917 (__mmask8) __U);
1918}
1919
1920extern __inline __m512i
1921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1922_mm512_cvtepu8_epi32 (__m128i __A)
1923{
1924 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1925 (__v16si)
0fc245cd 1926 _mm512_undefined_si512 (),
e2098065 1927 (__mmask16) -1);
1928}
1929
1930extern __inline __m512i
1931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1932_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1933{
1934 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1935 (__v16si) __W,
1936 (__mmask16) __U);
1937}
1938
1939extern __inline __m512i
1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1942{
1943 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1944 (__v16si)
1945 _mm512_setzero_si512 (),
1946 (__mmask16) __U);
1947}
1948
1949extern __inline __m512i
1950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951_mm512_cvtepu8_epi64 (__m128i __A)
1952{
1953 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1954 (__v8di)
0fc245cd 1955 _mm512_undefined_si512 (),
e2098065 1956 (__mmask8) -1);
1957}
1958
1959extern __inline __m512i
1960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1962{
1963 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1964 (__v8di) __W,
1965 (__mmask8) __U);
1966}
1967
1968extern __inline __m512i
1969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1971{
1972 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1973 (__v8di)
1974 _mm512_setzero_si512 (),
1975 (__mmask8) __U);
1976}
1977
1978extern __inline __m512i
1979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1980_mm512_cvtepu16_epi32 (__m256i __A)
1981{
1982 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1983 (__v16si)
0fc245cd 1984 _mm512_undefined_si512 (),
e2098065 1985 (__mmask16) -1);
1986}
1987
1988extern __inline __m512i
1989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1990_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1991{
1992 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1993 (__v16si) __W,
1994 (__mmask16) __U);
1995}
1996
1997extern __inline __m512i
1998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2000{
2001 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2002 (__v16si)
2003 _mm512_setzero_si512 (),
2004 (__mmask16) __U);
2005}
2006
2007extern __inline __m512i
2008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009_mm512_cvtepu16_epi64 (__m128i __A)
2010{
2011 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2012 (__v8di)
0fc245cd 2013 _mm512_undefined_si512 (),
e2098065 2014 (__mmask8) -1);
2015}
2016
2017extern __inline __m512i
2018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2019_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2020{
2021 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2022 (__v8di) __W,
2023 (__mmask8) __U);
2024}
2025
2026extern __inline __m512i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2029{
2030 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2031 (__v8di)
2032 _mm512_setzero_si512 (),
2033 (__mmask8) __U);
2034}
2035
2036extern __inline __m512i
2037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038_mm512_cvtepu32_epi64 (__m256i __X)
2039{
2040 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2041 (__v8di)
0fc245cd 2042 _mm512_undefined_si512 (),
e2098065 2043 (__mmask8) -1);
2044}
2045
2046extern __inline __m512i
2047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2048_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2049{
2050 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2051 (__v8di) __W,
2052 (__mmask8) __U);
2053}
2054
2055extern __inline __m512i
2056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2058{
2059 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2060 (__v8di)
2061 _mm512_setzero_si512 (),
2062 (__mmask8) __U);
2063}
2064
2065#ifdef __OPTIMIZE__
2066extern __inline __m512d
2067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2068_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2069{
2070 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2071 (__v8df) __B,
2072 (__v8df)
0fc245cd 2073 _mm512_undefined_pd (),
e2098065 2074 (__mmask8) -1, __R);
2075}
2076
2077extern __inline __m512d
2078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2079_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2080 __m512d __B, const int __R)
2081{
2082 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2083 (__v8df) __B,
2084 (__v8df) __W,
2085 (__mmask8) __U, __R);
2086}
2087
2088extern __inline __m512d
2089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2090_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2091 const int __R)
2092{
2093 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2094 (__v8df) __B,
2095 (__v8df)
2096 _mm512_setzero_pd (),
2097 (__mmask8) __U, __R);
2098}
2099
2100extern __inline __m512
2101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2103{
2104 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2105 (__v16sf) __B,
2106 (__v16sf)
0fc245cd 2107 _mm512_undefined_ps (),
e2098065 2108 (__mmask16) -1, __R);
2109}
2110
2111extern __inline __m512
2112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2113_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2114 __m512 __B, const int __R)
2115{
2116 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2117 (__v16sf) __B,
2118 (__v16sf) __W,
2119 (__mmask16) __U, __R);
2120}
2121
2122extern __inline __m512
2123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2124_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2125{
2126 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2127 (__v16sf) __B,
2128 (__v16sf)
2129 _mm512_setzero_ps (),
2130 (__mmask16) __U, __R);
2131}
2132
2133extern __inline __m512d
2134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2136{
2137 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2138 (__v8df) __B,
2139 (__v8df)
0fc245cd 2140 _mm512_undefined_pd (),
e2098065 2141 (__mmask8) -1, __R);
2142}
2143
2144extern __inline __m512d
2145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2147 __m512d __B, const int __R)
2148{
2149 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2150 (__v8df) __B,
2151 (__v8df) __W,
2152 (__mmask8) __U, __R);
2153}
2154
2155extern __inline __m512d
2156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2158 const int __R)
2159{
2160 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2161 (__v8df) __B,
2162 (__v8df)
2163 _mm512_setzero_pd (),
2164 (__mmask8) __U, __R);
2165}
2166
2167extern __inline __m512
2168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2170{
2171 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2172 (__v16sf) __B,
2173 (__v16sf)
0fc245cd 2174 _mm512_undefined_ps (),
e2098065 2175 (__mmask16) -1, __R);
2176}
2177
2178extern __inline __m512
2179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2180_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2181 __m512 __B, const int __R)
2182{
2183 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2184 (__v16sf) __B,
2185 (__v16sf) __W,
2186 (__mmask16) __U, __R);
2187}
2188
2189extern __inline __m512
2190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2191_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2192{
2193 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2194 (__v16sf) __B,
2195 (__v16sf)
2196 _mm512_setzero_ps (),
2197 (__mmask16) __U, __R);
2198}
2199#else
2200#define _mm512_add_round_pd(A, B, C) \
0fc245cd 2201 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
e2098065 2202
2203#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2204 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2205
2206#define _mm512_maskz_add_round_pd(U, A, B, C) \
2207 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2208
2209#define _mm512_add_round_ps(A, B, C) \
0fc245cd 2210 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
e2098065 2211
2212#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2213 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2214
2215#define _mm512_maskz_add_round_ps(U, A, B, C) \
2216 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2217
2218#define _mm512_sub_round_pd(A, B, C) \
0fc245cd 2219 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
e2098065 2220
2221#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2222 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2223
2224#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2225 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2226
2227#define _mm512_sub_round_ps(A, B, C) \
0fc245cd 2228 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
e2098065 2229
2230#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2231 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2232
2233#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2234 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2235#endif
2236
2237#ifdef __OPTIMIZE__
2238extern __inline __m512d
2239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2241{
2242 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2243 (__v8df) __B,
2244 (__v8df)
0fc245cd 2245 _mm512_undefined_pd (),
e2098065 2246 (__mmask8) -1, __R);
2247}
2248
2249extern __inline __m512d
2250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2252 __m512d __B, const int __R)
2253{
2254 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2255 (__v8df) __B,
2256 (__v8df) __W,
2257 (__mmask8) __U, __R);
2258}
2259
2260extern __inline __m512d
2261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2263 const int __R)
2264{
2265 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2266 (__v8df) __B,
2267 (__v8df)
2268 _mm512_setzero_pd (),
2269 (__mmask8) __U, __R);
2270}
2271
2272extern __inline __m512
2273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2274_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2275{
2276 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2277 (__v16sf) __B,
2278 (__v16sf)
0fc245cd 2279 _mm512_undefined_ps (),
e2098065 2280 (__mmask16) -1, __R);
2281}
2282
2283extern __inline __m512
2284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2286 __m512 __B, const int __R)
2287{
2288 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2289 (__v16sf) __B,
2290 (__v16sf) __W,
2291 (__mmask16) __U, __R);
2292}
2293
2294extern __inline __m512
2295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2297{
2298 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2299 (__v16sf) __B,
2300 (__v16sf)
2301 _mm512_setzero_ps (),
2302 (__mmask16) __U, __R);
2303}
2304
2305extern __inline __m512d
2306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2307_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2308{
2309 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2310 (__v8df) __V,
2311 (__v8df)
0fc245cd 2312 _mm512_undefined_pd (),
e2098065 2313 (__mmask8) -1, __R);
2314}
2315
2316extern __inline __m512d
2317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2319 __m512d __V, const int __R)
2320{
2321 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2322 (__v8df) __V,
2323 (__v8df) __W,
2324 (__mmask8) __U, __R);
2325}
2326
2327extern __inline __m512d
2328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2330 const int __R)
2331{
2332 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2333 (__v8df) __V,
2334 (__v8df)
2335 _mm512_setzero_pd (),
2336 (__mmask8) __U, __R);
2337}
2338
2339extern __inline __m512
2340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2341_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2342{
2343 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2344 (__v16sf) __B,
2345 (__v16sf)
0fc245cd 2346 _mm512_undefined_ps (),
e2098065 2347 (__mmask16) -1, __R);
2348}
2349
2350extern __inline __m512
2351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2352_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2353 __m512 __B, const int __R)
2354{
2355 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2356 (__v16sf) __B,
2357 (__v16sf) __W,
2358 (__mmask16) __U, __R);
2359}
2360
2361extern __inline __m512
2362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2364{
2365 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2366 (__v16sf) __B,
2367 (__v16sf)
2368 _mm512_setzero_ps (),
2369 (__mmask16) __U, __R);
2370}
2371
0b7cc9c6 2372extern __inline __m128d
2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2375{
2376 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2377 (__v2df) __B,
2378 __R);
2379}
2380
2381extern __inline __m128
2382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2383_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2384{
2385 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2386 (__v4sf) __B,
2387 __R);
2388}
2389
2390extern __inline __m128d
2391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2393{
2394 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2395 (__v2df) __B,
2396 __R);
2397}
2398
2399extern __inline __m128
2400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2402{
2403 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2404 (__v4sf) __B,
2405 __R);
2406}
2407
e2098065 2408#else
2409#define _mm512_mul_round_pd(A, B, C) \
0fc245cd 2410 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
e2098065 2411
2412#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2413 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2414
2415#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2416 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2417
2418#define _mm512_mul_round_ps(A, B, C) \
0fc245cd 2419 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
e2098065 2420
2421#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2422 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2423
2424#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2425 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2426
2427#define _mm512_div_round_pd(A, B, C) \
0fc245cd 2428 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
e2098065 2429
2430#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2431 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2432
2433#define _mm512_maskz_div_round_pd(U, A, B, C) \
2434 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2435
2436#define _mm512_div_round_ps(A, B, C) \
0fc245cd 2437 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
e2098065 2438
2439#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2440 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2441
2442#define _mm512_maskz_div_round_ps(U, A, B, C) \
2443 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
0b7cc9c6 2444
2445#define _mm_mul_round_sd(A, B, C) \
2446 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2447
2448#define _mm_mul_round_ss(A, B, C) \
2449 (__m128)__builtin_ia32_mulss_round(A, B, C)
2450
2451#define _mm_div_round_sd(A, B, C) \
2452 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2453
2454#define _mm_div_round_ss(A, B, C) \
2455 (__m128)__builtin_ia32_divss_round(A, B, C)
e2098065 2456#endif
2457
2458#ifdef __OPTIMIZE__
2459extern __inline __m512d
2460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2462{
2463 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2464 (__v8df) __B,
2465 (__v8df)
0fc245cd 2466 _mm512_undefined_pd (),
e2098065 2467 (__mmask8) -1, __R);
2468}
2469
2470extern __inline __m512d
2471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2472_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2473 __m512d __B, const int __R)
2474{
2475 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2476 (__v8df) __B,
2477 (__v8df) __W,
2478 (__mmask8) __U, __R);
2479}
2480
2481extern __inline __m512d
2482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2483_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2484 const int __R)
2485{
2486 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2487 (__v8df) __B,
2488 (__v8df)
2489 _mm512_setzero_pd (),
2490 (__mmask8) __U, __R);
2491}
2492
2493extern __inline __m512
2494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2496{
2497 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2498 (__v16sf) __B,
2499 (__v16sf)
0fc245cd 2500 _mm512_undefined_ps (),
e2098065 2501 (__mmask16) -1, __R);
2502}
2503
2504extern __inline __m512
2505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2507 __m512 __B, const int __R)
2508{
2509 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2510 (__v16sf) __B,
2511 (__v16sf) __W,
2512 (__mmask16) __U, __R);
2513}
2514
2515extern __inline __m512
2516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2517_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2518{
2519 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2520 (__v16sf) __B,
2521 (__v16sf)
2522 _mm512_setzero_ps (),
2523 (__mmask16) __U, __R);
2524}
2525
2526extern __inline __m512d
2527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2528_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2529{
2530 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2531 (__v8df) __B,
2532 (__v8df)
0fc245cd 2533 _mm512_undefined_pd (),
e2098065 2534 (__mmask8) -1, __R);
2535}
2536
2537extern __inline __m512d
2538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2540 __m512d __B, const int __R)
2541{
2542 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2543 (__v8df) __B,
2544 (__v8df) __W,
2545 (__mmask8) __U, __R);
2546}
2547
2548extern __inline __m512d
2549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2550_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2551 const int __R)
2552{
2553 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2554 (__v8df) __B,
2555 (__v8df)
2556 _mm512_setzero_pd (),
2557 (__mmask8) __U, __R);
2558}
2559
2560extern __inline __m512
2561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2563{
2564 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2565 (__v16sf) __B,
2566 (__v16sf)
0fc245cd 2567 _mm512_undefined_ps (),
e2098065 2568 (__mmask16) -1, __R);
2569}
2570
2571extern __inline __m512
2572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2574 __m512 __B, const int __R)
2575{
2576 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2577 (__v16sf) __B,
2578 (__v16sf) __W,
2579 (__mmask16) __U, __R);
2580}
2581
2582extern __inline __m512
2583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2584_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2585{
2586 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2587 (__v16sf) __B,
2588 (__v16sf)
2589 _mm512_setzero_ps (),
2590 (__mmask16) __U, __R);
2591}
2592#else
2593#define _mm512_max_round_pd(A, B, R) \
0fc245cd 2594 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
e2098065 2595
2596#define _mm512_mask_max_round_pd(W, U, A, B, R) \
2597 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2598
2599#define _mm512_maskz_max_round_pd(U, A, B, R) \
2600 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2601
2602#define _mm512_max_round_ps(A, B, R) \
0fc245cd 2603 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
e2098065 2604
2605#define _mm512_mask_max_round_ps(W, U, A, B, R) \
2606 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2607
2608#define _mm512_maskz_max_round_ps(U, A, B, R) \
2609 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2610
2611#define _mm512_min_round_pd(A, B, R) \
0fc245cd 2612 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
e2098065 2613
2614#define _mm512_mask_min_round_pd(W, U, A, B, R) \
2615 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2616
2617#define _mm512_maskz_min_round_pd(U, A, B, R) \
2618 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2619
2620#define _mm512_min_round_ps(A, B, R) \
0fc245cd 2621 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
e2098065 2622
2623#define _mm512_mask_min_round_ps(W, U, A, B, R) \
2624 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2625
2626#define _mm512_maskz_min_round_ps(U, A, B, R) \
2627 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2628#endif
2629
2630#ifdef __OPTIMIZE__
2631extern __inline __m512d
2632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2633_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2634{
2635 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2636 (__v8df) __B,
2637 (__v8df)
0fc245cd 2638 _mm512_undefined_pd (),
e2098065 2639 (__mmask8) -1, __R);
2640}
2641
2642extern __inline __m512d
2643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2644_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2645 __m512d __B, const int __R)
2646{
2647 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2648 (__v8df) __B,
2649 (__v8df) __W,
2650 (__mmask8) __U, __R);
2651}
2652
2653extern __inline __m512d
2654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2656 const int __R)
2657{
2658 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2659 (__v8df) __B,
2660 (__v8df)
2661 _mm512_setzero_pd (),
2662 (__mmask8) __U, __R);
2663}
2664
2665extern __inline __m512
2666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2667_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2668{
2669 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2670 (__v16sf) __B,
2671 (__v16sf)
0fc245cd 2672 _mm512_undefined_ps (),
e2098065 2673 (__mmask16) -1, __R);
2674}
2675
2676extern __inline __m512
2677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2678_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2679 __m512 __B, const int __R)
2680{
2681 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2682 (__v16sf) __B,
2683 (__v16sf) __W,
2684 (__mmask16) __U, __R);
2685}
2686
2687extern __inline __m512
2688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2689_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2690 const int __R)
2691{
2692 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2693 (__v16sf) __B,
2694 (__v16sf)
2695 _mm512_setzero_ps (),
2696 (__mmask16) __U, __R);
2697}
2698
0b7cc9c6 2699extern __inline __m128d
2700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2702{
2703 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2704 (__v2df) __B,
2705 __R);
2706}
2707
2708extern __inline __m128
2709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2711{
2712 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2713 (__v4sf) __B,
2714 __R);
2715}
e2098065 2716#else
2717#define _mm512_scalef_round_pd(A, B, C) \
0fc245cd 2718 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
e2098065 2719
2720#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2721 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2722
2723#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2724 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2725
2726#define _mm512_scalef_round_ps(A, B, C) \
0fc245cd 2727 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
e2098065 2728
2729#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2730 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2731
2732#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2733 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
0b7cc9c6 2734
2735#define _mm_scalef_round_sd(A, B, C) \
2736 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2737
2738#define _mm_scalef_round_ss(A, B, C) \
2739 (__m128)__builtin_ia32_scalefss_round(A, B, C)
e2098065 2740#endif
2741
2742#ifdef __OPTIMIZE__
2743extern __inline __m512d
2744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2745_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2746{
2747 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2748 (__v8df) __B,
2749 (__v8df) __C,
2750 (__mmask8) -1, __R);
2751}
2752
2753extern __inline __m512d
2754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2755_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2756 __m512d __C, const int __R)
2757{
2758 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2759 (__v8df) __B,
2760 (__v8df) __C,
2761 (__mmask8) __U, __R);
2762}
2763
2764extern __inline __m512d
2765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2766_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2767 __mmask8 __U, const int __R)
2768{
2769 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2770 (__v8df) __B,
2771 (__v8df) __C,
2772 (__mmask8) __U, __R);
2773}
2774
2775extern __inline __m512d
2776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2778 __m512d __C, const int __R)
2779{
2780 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2781 (__v8df) __B,
2782 (__v8df) __C,
2783 (__mmask8) __U, __R);
2784}
2785
2786extern __inline __m512
2787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2789{
2790 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2791 (__v16sf) __B,
2792 (__v16sf) __C,
2793 (__mmask16) -1, __R);
2794}
2795
2796extern __inline __m512
2797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2798_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2799 __m512 __C, const int __R)
2800{
2801 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2802 (__v16sf) __B,
2803 (__v16sf) __C,
2804 (__mmask16) __U, __R);
2805}
2806
2807extern __inline __m512
2808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2809_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2810 __mmask16 __U, const int __R)
2811{
2812 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2813 (__v16sf) __B,
2814 (__v16sf) __C,
2815 (__mmask16) __U, __R);
2816}
2817
2818extern __inline __m512
2819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2820_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2821 __m512 __C, const int __R)
2822{
2823 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2824 (__v16sf) __B,
2825 (__v16sf) __C,
2826 (__mmask16) __U, __R);
2827}
2828
2829extern __inline __m512d
2830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2831_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2832{
2833 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2834 (__v8df) __B,
2835 -(__v8df) __C,
2836 (__mmask8) -1, __R);
2837}
2838
2839extern __inline __m512d
2840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2841_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2842 __m512d __C, const int __R)
2843{
2844 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2845 (__v8df) __B,
2846 -(__v8df) __C,
2847 (__mmask8) __U, __R);
2848}
2849
2850extern __inline __m512d
2851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2852_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2853 __mmask8 __U, const int __R)
2854{
2855 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2856 (__v8df) __B,
2857 (__v8df) __C,
2858 (__mmask8) __U, __R);
2859}
2860
2861extern __inline __m512d
2862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2863_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2864 __m512d __C, const int __R)
2865{
2866 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2867 (__v8df) __B,
2868 -(__v8df) __C,
2869 (__mmask8) __U, __R);
2870}
2871
2872extern __inline __m512
2873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2874_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2875{
2876 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2877 (__v16sf) __B,
2878 -(__v16sf) __C,
2879 (__mmask16) -1, __R);
2880}
2881
2882extern __inline __m512
2883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2884_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2885 __m512 __C, const int __R)
2886{
2887 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2888 (__v16sf) __B,
2889 -(__v16sf) __C,
2890 (__mmask16) __U, __R);
2891}
2892
2893extern __inline __m512
2894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2895_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2896 __mmask16 __U, const int __R)
2897{
2898 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2899 (__v16sf) __B,
2900 (__v16sf) __C,
2901 (__mmask16) __U, __R);
2902}
2903
2904extern __inline __m512
2905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2906_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2907 __m512 __C, const int __R)
2908{
2909 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2910 (__v16sf) __B,
2911 -(__v16sf) __C,
2912 (__mmask16) __U, __R);
2913}
2914
2915extern __inline __m512d
2916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2917_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2918{
2919 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2920 (__v8df) __B,
2921 (__v8df) __C,
2922 (__mmask8) -1, __R);
2923}
2924
2925extern __inline __m512d
2926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2927_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2928 __m512d __C, const int __R)
2929{
2930 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2931 (__v8df) __B,
2932 (__v8df) __C,
2933 (__mmask8) __U, __R);
2934}
2935
2936extern __inline __m512d
2937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2939 __mmask8 __U, const int __R)
2940{
2941 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2942 (__v8df) __B,
2943 (__v8df) __C,
2944 (__mmask8) __U, __R);
2945}
2946
2947extern __inline __m512d
2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2950 __m512d __C, const int __R)
2951{
2952 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2953 (__v8df) __B,
2954 (__v8df) __C,
2955 (__mmask8) __U, __R);
2956}
2957
2958extern __inline __m512
2959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2960_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2961{
2962 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2963 (__v16sf) __B,
2964 (__v16sf) __C,
2965 (__mmask16) -1, __R);
2966}
2967
2968extern __inline __m512
2969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2970_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2971 __m512 __C, const int __R)
2972{
2973 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2974 (__v16sf) __B,
2975 (__v16sf) __C,
2976 (__mmask16) __U, __R);
2977}
2978
2979extern __inline __m512
2980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2981_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2982 __mmask16 __U, const int __R)
2983{
2984 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2985 (__v16sf) __B,
2986 (__v16sf) __C,
2987 (__mmask16) __U, __R);
2988}
2989
2990extern __inline __m512
2991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2993 __m512 __C, const int __R)
2994{
2995 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2996 (__v16sf) __B,
2997 (__v16sf) __C,
2998 (__mmask16) __U, __R);
2999}
3000
3001extern __inline __m512d
3002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3004{
3005 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3006 (__v8df) __B,
3007 -(__v8df) __C,
3008 (__mmask8) -1, __R);
3009}
3010
3011extern __inline __m512d
3012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3014 __m512d __C, const int __R)
3015{
3016 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3017 (__v8df) __B,
3018 -(__v8df) __C,
3019 (__mmask8) __U, __R);
3020}
3021
3022extern __inline __m512d
3023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3025 __mmask8 __U, const int __R)
3026{
3027 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3028 (__v8df) __B,
3029 (__v8df) __C,
3030 (__mmask8) __U, __R);
3031}
3032
3033extern __inline __m512d
3034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3036 __m512d __C, const int __R)
3037{
3038 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3039 (__v8df) __B,
3040 -(__v8df) __C,
3041 (__mmask8) __U, __R);
3042}
3043
3044extern __inline __m512
3045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3046_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3047{
3048 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3049 (__v16sf) __B,
3050 -(__v16sf) __C,
3051 (__mmask16) -1, __R);
3052}
3053
3054extern __inline __m512
3055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3056_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3057 __m512 __C, const int __R)
3058{
3059 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3060 (__v16sf) __B,
3061 -(__v16sf) __C,
3062 (__mmask16) __U, __R);
3063}
3064
3065extern __inline __m512
3066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3067_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3068 __mmask16 __U, const int __R)
3069{
3070 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3071 (__v16sf) __B,
3072 (__v16sf) __C,
3073 (__mmask16) __U, __R);
3074}
3075
3076extern __inline __m512
3077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3078_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3079 __m512 __C, const int __R)
3080{
3081 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3082 (__v16sf) __B,
3083 -(__v16sf) __C,
3084 (__mmask16) __U, __R);
3085}
3086
3087extern __inline __m512d
3088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3089_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3090{
3091 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3092 (__v8df) __B,
3093 (__v8df) __C,
3094 (__mmask8) -1, __R);
3095}
3096
3097extern __inline __m512d
3098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3099_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3100 __m512d __C, const int __R)
3101{
3102 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3103 (__v8df) __B,
3104 (__v8df) __C,
3105 (__mmask8) __U, __R);
3106}
3107
3108extern __inline __m512d
3109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3110_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3111 __mmask8 __U, const int __R)
3112{
3113 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3114 (__v8df) __B,
3115 (__v8df) __C,
3116 (__mmask8) __U, __R);
3117}
3118
3119extern __inline __m512d
3120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3121_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3122 __m512d __C, const int __R)
3123{
3124 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3125 (__v8df) __B,
3126 (__v8df) __C,
3127 (__mmask8) __U, __R);
3128}
3129
3130extern __inline __m512
3131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3132_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3133{
3134 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3135 (__v16sf) __B,
3136 (__v16sf) __C,
3137 (__mmask16) -1, __R);
3138}
3139
3140extern __inline __m512
3141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3142_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3143 __m512 __C, const int __R)
3144{
3145 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3146 (__v16sf) __B,
3147 (__v16sf) __C,
3148 (__mmask16) __U, __R);
3149}
3150
3151extern __inline __m512
3152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3153_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3154 __mmask16 __U, const int __R)
3155{
3156 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3157 (__v16sf) __B,
3158 (__v16sf) __C,
3159 (__mmask16) __U, __R);
3160}
3161
3162extern __inline __m512
3163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3165 __m512 __C, const int __R)
3166{
3167 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3168 (__v16sf) __B,
3169 (__v16sf) __C,
3170 (__mmask16) __U, __R);
3171}
3172
3173extern __inline __m512d
3174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3176{
3177 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3178 (__v8df) __B,
3179 -(__v8df) __C,
3180 (__mmask8) -1, __R);
3181}
3182
3183extern __inline __m512d
3184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3185_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3186 __m512d __C, const int __R)
3187{
3188 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3189 (__v8df) __B,
3190 (__v8df) __C,
3191 (__mmask8) __U, __R);
3192}
3193
3194extern __inline __m512d
3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3197 __mmask8 __U, const int __R)
3198{
3199 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3200 (__v8df) __B,
3201 (__v8df) __C,
3202 (__mmask8) __U, __R);
3203}
3204
3205extern __inline __m512d
3206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3208 __m512d __C, const int __R)
3209{
3210 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3211 (__v8df) __B,
3212 -(__v8df) __C,
3213 (__mmask8) __U, __R);
3214}
3215
3216extern __inline __m512
3217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3218_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3219{
3220 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3221 (__v16sf) __B,
3222 -(__v16sf) __C,
3223 (__mmask16) -1, __R);
3224}
3225
3226extern __inline __m512
3227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3228_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3229 __m512 __C, const int __R)
3230{
3231 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3232 (__v16sf) __B,
3233 (__v16sf) __C,
3234 (__mmask16) __U, __R);
3235}
3236
3237extern __inline __m512
3238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3239_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3240 __mmask16 __U, const int __R)
3241{
3242 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3243 (__v16sf) __B,
3244 (__v16sf) __C,
3245 (__mmask16) __U, __R);
3246}
3247
3248extern __inline __m512
3249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3250_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3251 __m512 __C, const int __R)
3252{
3253 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3254 (__v16sf) __B,
3255 -(__v16sf) __C,
3256 (__mmask16) __U, __R);
3257}
3258#else
3259#define _mm512_fmadd_round_pd(A, B, C, R) \
3260 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3261
3262#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3263 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3264
3265#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3266 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3267
3268#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3269 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3270
3271#define _mm512_fmadd_round_ps(A, B, C, R) \
3272 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3273
3274#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3275 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3276
3277#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3278 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3279
3280#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3281 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3282
3283#define _mm512_fmsub_round_pd(A, B, C, R) \
3284 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3285
3286#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3287 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3288
3289#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3290 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3291
3292#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3293 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3294
3295#define _mm512_fmsub_round_ps(A, B, C, R) \
3296 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3297
3298#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3299 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3300
3301#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3302 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3303
3304#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3305 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3306
3307#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3308 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3309
3310#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3311 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3312
3313#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3314 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3315
3316#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3317 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3318
3319#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3320 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3321
3322#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3323 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3324
3325#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3326 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3327
3328#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3329 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3330
3331#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3332 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3333
3334#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3335 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3336
3337#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3338 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3339
3340#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3341 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3342
3343#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3344 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3345
3346#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3347 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3348
3349#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3350 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3351
3352#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3353 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3354
3355#define _mm512_fnmadd_round_pd(A, B, C, R) \
3356 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3357
3358#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3359 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3360
3361#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3362 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3363
3364#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3365 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3366
3367#define _mm512_fnmadd_round_ps(A, B, C, R) \
3368 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3369
3370#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3371 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3372
3373#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3374 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3375
3376#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3377 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3378
3379#define _mm512_fnmsub_round_pd(A, B, C, R) \
3380 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3381
3382#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3383 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3384
3385#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3386 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3387
3388#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3389 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3390
3391#define _mm512_fnmsub_round_ps(A, B, C, R) \
3392 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3393
3394#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3395 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3396
3397#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3398 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3399
3400#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3401 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3402#endif
3403
3404extern __inline __m512i
3405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406_mm512_abs_epi64 (__m512i __A)
3407{
3408 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3409 (__v8di)
0fc245cd 3410 _mm512_undefined_si512 (),
e2098065 3411 (__mmask8) -1);
3412}
3413
3414extern __inline __m512i
3415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3416_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3417{
3418 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3419 (__v8di) __W,
3420 (__mmask8) __U);
3421}
3422
3423extern __inline __m512i
3424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3426{
3427 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3428 (__v8di)
3429 _mm512_setzero_si512 (),
3430 (__mmask8) __U);
3431}
3432
3433extern __inline __m512i
3434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435_mm512_abs_epi32 (__m512i __A)
3436{
3437 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3438 (__v16si)
0fc245cd 3439 _mm512_undefined_si512 (),
e2098065 3440 (__mmask16) -1);
3441}
3442
3443extern __inline __m512i
3444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3445_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3446{
3447 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3448 (__v16si) __W,
3449 (__mmask16) __U);
3450}
3451
3452extern __inline __m512i
3453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3455{
3456 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3457 (__v16si)
3458 _mm512_setzero_si512 (),
3459 (__mmask16) __U);
3460}
3461
3462extern __inline __m512
3463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3464_mm512_broadcastss_ps (__m128 __A)
3465{
0fc245cd 3466 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3467 (__v16sf)
3468 _mm512_undefined_ps (),
e2098065 3469 (__mmask16) -1);
3470}
3471
3472extern __inline __m512
3473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3474_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3475{
3476 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3477 (__v16sf) __O, __M);
3478}
3479
3480extern __inline __m512
3481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3482_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3483{
3484 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3485 (__v16sf)
3486 _mm512_setzero_ps (),
3487 __M);
3488}
3489
3490extern __inline __m512d
3491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492_mm512_broadcastsd_pd (__m128d __A)
3493{
0fc245cd 3494 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3495 (__v8df)
3496 _mm512_undefined_pd (),
e2098065 3497 (__mmask8) -1);
3498}
3499
3500extern __inline __m512d
3501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3502_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3503{
3504 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3505 (__v8df) __O, __M);
3506}
3507
3508extern __inline __m512d
3509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3511{
3512 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3513 (__v8df)
3514 _mm512_setzero_pd (),
3515 __M);
3516}
3517
3518extern __inline __m512i
3519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3520_mm512_broadcastd_epi32 (__m128i __A)
3521{
0fc245cd 3522 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3523 (__v16si)
3524 _mm512_undefined_si512 (),
e2098065 3525 (__mmask16) -1);
3526}
3527
3528extern __inline __m512i
3529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3530_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3531{
3532 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3533 (__v16si) __O, __M);
3534}
3535
3536extern __inline __m512i
3537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3539{
3540 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3541 (__v16si)
3542 _mm512_setzero_si512 (),
3543 __M);
3544}
3545
3546extern __inline __m512i
3547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3548_mm512_set1_epi32 (int __A)
3549{
0fc245cd 3550 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3551 (__v16si)
3552 _mm512_undefined_si512 (),
e2098065 3553 (__mmask16)(-1));
3554}
3555
3556extern __inline __m512i
3557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3558_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3559{
3560 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3561 __M);
3562}
3563
3564extern __inline __m512i
3565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3567{
3568 return (__m512i)
3569 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3570 (__v16si) _mm512_setzero_si512 (),
3571 __M);
3572}
3573
3574extern __inline __m512i
3575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576_mm512_broadcastq_epi64 (__m128i __A)
3577{
0fc245cd 3578 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3579 (__v8di)
3580 _mm512_undefined_si512 (),
e2098065 3581 (__mmask8) -1);
3582}
3583
3584extern __inline __m512i
3585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3587{
3588 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3589 (__v8di) __O, __M);
3590}
3591
3592extern __inline __m512i
3593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3594_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3595{
3596 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3597 (__v8di)
3598 _mm512_setzero_si512 (),
3599 __M);
3600}
3601
3602extern __inline __m512i
3603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3604_mm512_set1_epi64 (long long __A)
3605{
0fc245cd 3606 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3607 (__v8di)
3608 _mm512_undefined_si512 (),
e2098065 3609 (__mmask8)(-1));
e2098065 3610}
3611
3612extern __inline __m512i
3613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3615{
e2098065 3616 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3617 __M);
e2098065 3618}
3619
3620extern __inline __m512i
3621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3623{
e2098065 3624 return (__m512i)
3625 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3626 (__v8di) _mm512_setzero_si512 (),
3627 __M);
e2098065 3628}
3629
3630extern __inline __m512
3631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632_mm512_broadcast_f32x4 (__m128 __A)
3633{
0fc245cd 3634 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3635 (__v16sf)
3636 _mm512_undefined_ps (),
e2098065 3637 (__mmask16) -1);
3638}
3639
3640extern __inline __m512
3641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3642_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3643{
3644 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3645 (__v16sf) __O,
3646 __M);
3647}
3648
3649extern __inline __m512
3650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3652{
3653 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3654 (__v16sf)
3655 _mm512_setzero_ps (),
3656 __M);
3657}
3658
3659extern __inline __m512i
3660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661_mm512_broadcast_i32x4 (__m128i __A)
3662{
e2098065 3663 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
0fc245cd 3664 (__v16si)
3665 _mm512_undefined_si512 (),
e2098065 3666 (__mmask16) -1);
3667}
3668
3669extern __inline __m512i
3670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3671_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3672{
3673 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3674 (__v16si) __O,
3675 __M);
3676}
3677
3678extern __inline __m512i
3679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3681{
3682 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3683 (__v16si)
3684 _mm512_setzero_si512 (),
3685 __M);
3686}
3687
3688extern __inline __m512d
3689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690_mm512_broadcast_f64x4 (__m256d __A)
3691{
e2098065 3692 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
0fc245cd 3693 (__v8df)
3694 _mm512_undefined_pd (),
e2098065 3695 (__mmask8) -1);
3696}
3697
3698extern __inline __m512d
3699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3701{
3702 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3703 (__v8df) __O,
3704 __M);
3705}
3706
3707extern __inline __m512d
3708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3710{
3711 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3712 (__v8df)
3713 _mm512_setzero_pd (),
3714 __M);
3715}
3716
3717extern __inline __m512i
3718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719_mm512_broadcast_i64x4 (__m256i __A)
3720{
e2098065 3721 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
0fc245cd 3722 (__v8di)
3723 _mm512_undefined_si512 (),
e2098065 3724 (__mmask8) -1);
3725}
3726
3727extern __inline __m512i
3728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3730{
3731 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3732 (__v8di) __O,
3733 __M);
3734}
3735
3736extern __inline __m512i
3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3739{
3740 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3741 (__v8di)
3742 _mm512_setzero_si512 (),
3743 __M);
3744}
3745
3746typedef enum
3747{
3748 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3749 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3750 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3751 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3752 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3753 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3754 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3755 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3756 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3757 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3758 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3759 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3760 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3761 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3762 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3763 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3764 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3765 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3766 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3767 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3768 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3769 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3770 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3771 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3772 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3773 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3774 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3775 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3776 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3777 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3778 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3779 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3780 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3781 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3782 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3783 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3784 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3785 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3786 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3787 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3788 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3789 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3790 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3791 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3792 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3793 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3794 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3795 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3796 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3797 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3798 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3799 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3800 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3801 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3802 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3803 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3804 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3805 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3806 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3807 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3808 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3809 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3810 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3811 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3812 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3813 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3814 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3815 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3816 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3817 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3818 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3819 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3820 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3821 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3822 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3823 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3824 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3825 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3826 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3827 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3828 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3829 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3830 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3831 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3832 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3833 _MM_PERM_DDDD = 0xFF
3834} _MM_PERM_ENUM;
3835
3836#ifdef __OPTIMIZE__
3837extern __inline __m512i
3838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3839_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3840{
3841 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3842 __mask,
3843 (__v16si)
0fc245cd 3844 _mm512_undefined_si512 (),
e2098065 3845 (__mmask16) -1);
3846}
3847
3848extern __inline __m512i
3849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3850_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3851 _MM_PERM_ENUM __mask)
3852{
3853 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3854 __mask,
3855 (__v16si) __W,
3856 (__mmask16) __U);
3857}
3858
3859extern __inline __m512i
3860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3861_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3862{
3863 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3864 __mask,
3865 (__v16si)
3866 _mm512_setzero_si512 (),
3867 (__mmask16) __U);
3868}
3869
3870extern __inline __m512i
3871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3872_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3873{
3874 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3875 (__v8di) __B, __imm,
3876 (__v8di)
0fc245cd 3877 _mm512_undefined_si512 (),
e2098065 3878 (__mmask8) -1);
3879}
3880
3881extern __inline __m512i
3882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3883_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3884 __m512i __B, const int __imm)
3885{
3886 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3887 (__v8di) __B, __imm,
3888 (__v8di) __W,
3889 (__mmask8) __U);
3890}
3891
3892extern __inline __m512i
3893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3894_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3895 const int __imm)
3896{
3897 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3898 (__v8di) __B, __imm,
3899 (__v8di)
3900 _mm512_setzero_si512 (),
3901 (__mmask8) __U);
3902}
3903
3904extern __inline __m512i
3905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3906_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3907{
3908 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3909 (__v16si) __B,
3910 __imm,
3911 (__v16si)
0fc245cd 3912 _mm512_undefined_si512 (),
e2098065 3913 (__mmask16) -1);
3914}
3915
3916extern __inline __m512i
3917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3919 __m512i __B, const int __imm)
3920{
3921 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3922 (__v16si) __B,
3923 __imm,
3924 (__v16si) __W,
3925 (__mmask16) __U);
3926}
3927
3928extern __inline __m512i
3929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3930_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3931 const int __imm)
3932{
3933 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3934 (__v16si) __B,
3935 __imm,
3936 (__v16si)
3937 _mm512_setzero_si512 (),
3938 (__mmask16) __U);
3939}
3940
3941extern __inline __m512d
3942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3944{
3945 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3946 (__v8df) __B, __imm,
3947 (__v8df)
0fc245cd 3948 _mm512_undefined_pd (),
e2098065 3949 (__mmask8) -1);
3950}
3951
3952extern __inline __m512d
3953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3954_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3955 __m512d __B, const int __imm)
3956{
3957 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3958 (__v8df) __B, __imm,
3959 (__v8df) __W,
3960 (__mmask8) __U);
3961}
3962
3963extern __inline __m512d
3964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3965_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3966 const int __imm)
3967{
3968 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3969 (__v8df) __B, __imm,
3970 (__v8df)
3971 _mm512_setzero_pd (),
3972 (__mmask8) __U);
3973}
3974
3975extern __inline __m512
3976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3977_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3978{
3979 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3980 (__v16sf) __B, __imm,
3981 (__v16sf)
0fc245cd 3982 _mm512_undefined_ps (),
e2098065 3983 (__mmask16) -1);
3984}
3985
3986extern __inline __m512
3987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3988_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3989 __m512 __B, const int __imm)
3990{
3991 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3992 (__v16sf) __B, __imm,
3993 (__v16sf) __W,
3994 (__mmask16) __U);
3995}
3996
3997extern __inline __m512
3998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4000 const int __imm)
4001{
4002 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4003 (__v16sf) __B, __imm,
4004 (__v16sf)
4005 _mm512_setzero_ps (),
4006 (__mmask16) __U);
4007}
4008
4009#else
4010#define _mm512_shuffle_epi32(X, C) \
4011 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
0fc245cd 4012 (__v16si)(__m512i)_mm512_undefined_si512 (),\
e2098065 4013 (__mmask16)-1))
4014
4015#define _mm512_mask_shuffle_epi32(W, U, X, C) \
4016 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4017 (__v16si)(__m512i)(W),\
4018 (__mmask16)(U)))
4019
4020#define _mm512_maskz_shuffle_epi32(U, X, C) \
4021 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4022 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4023 (__mmask16)(U)))
4024
4025#define _mm512_shuffle_i64x2(X, Y, C) \
4026 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4027 (__v8di)(__m512i)(Y), (int)(C),\
0fc245cd 4028 (__v8di)(__m512i)_mm512_undefined_si512 (),\
e2098065 4029 (__mmask8)-1))
4030
4031#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4032 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4033 (__v8di)(__m512i)(Y), (int)(C),\
4034 (__v8di)(__m512i)(W),\
4035 (__mmask8)(U)))
4036
4037#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4038 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4039 (__v8di)(__m512i)(Y), (int)(C),\
4040 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4041 (__mmask8)(U)))
4042
4043#define _mm512_shuffle_i32x4(X, Y, C) \
4044 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4045 (__v16si)(__m512i)(Y), (int)(C),\
0fc245cd 4046 (__v16si)(__m512i)_mm512_undefined_si512 (),\
e2098065 4047 (__mmask16)-1))
4048
4049#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4050 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4051 (__v16si)(__m512i)(Y), (int)(C),\
4052 (__v16si)(__m512i)(W),\
4053 (__mmask16)(U)))
4054
4055#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4056 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4057 (__v16si)(__m512i)(Y), (int)(C),\
4058 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4059 (__mmask16)(U)))
4060
4061#define _mm512_shuffle_f64x2(X, Y, C) \
4062 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4063 (__v8df)(__m512d)(Y), (int)(C),\
0fc245cd 4064 (__v8df)(__m512d)_mm512_undefined_pd(),\
e2098065 4065 (__mmask8)-1))
4066
4067#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4068 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4069 (__v8df)(__m512d)(Y), (int)(C),\
4070 (__v8df)(__m512d)(W),\
4071 (__mmask8)(U)))
4072
4073#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4074 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4075 (__v8df)(__m512d)(Y), (int)(C),\
4076 (__v8df)(__m512d)_mm512_setzero_pd(),\
4077 (__mmask8)(U)))
4078
4079#define _mm512_shuffle_f32x4(X, Y, C) \
4080 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4081 (__v16sf)(__m512)(Y), (int)(C),\
0fc245cd 4082 (__v16sf)(__m512)_mm512_undefined_ps(),\
e2098065 4083 (__mmask16)-1))
4084
4085#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4086 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4087 (__v16sf)(__m512)(Y), (int)(C),\
4088 (__v16sf)(__m512)(W),\
4089 (__mmask16)(U)))
4090
4091#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4092 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4093 (__v16sf)(__m512)(Y), (int)(C),\
4094 (__v16sf)(__m512)_mm512_setzero_ps(),\
4095 (__mmask16)(U)))
4096#endif
4097
4098extern __inline __m512i
4099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4100_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4101{
4102 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4103 (__v16si) __B,
4104 (__v16si)
0fc245cd 4105 _mm512_undefined_si512 (),
e2098065 4106 (__mmask16) -1);
4107}
4108
4109extern __inline __m512i
4110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4112{
4113 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4114 (__v16si) __B,
4115 (__v16si) __W,
4116 (__mmask16) __U);
4117}
4118
4119extern __inline __m512i
4120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4121_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4122{
4123 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4124 (__v16si) __B,
4125 (__v16si)
4126 _mm512_setzero_si512 (),
4127 (__mmask16) __U);
4128}
4129
4130extern __inline __m512i
4131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4132_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4133{
4134 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4135 (__v16si) __B,
4136 (__v16si)
0fc245cd 4137 _mm512_undefined_si512 (),
e2098065 4138 (__mmask16) -1);
4139}
4140
4141extern __inline __m512i
4142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4143_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4144{
4145 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4146 (__v16si) __B,
4147 (__v16si) __W,
4148 (__mmask16) __U);
4149}
4150
4151extern __inline __m512i
4152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4153_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4154{
4155 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4156 (__v16si) __B,
4157 (__v16si)
4158 _mm512_setzero_si512 (),
4159 (__mmask16) __U);
4160}
4161
4162extern __inline __m512i
4163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4165{
4166 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4167 (__v8di) __B,
4168 (__v8di)
0fc245cd 4169 _mm512_undefined_si512 (),
e2098065 4170 (__mmask8) -1);
4171}
4172
4173extern __inline __m512i
4174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4175_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4176{
4177 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4178 (__v8di) __B,
4179 (__v8di) __W,
4180 (__mmask8) __U);
4181}
4182
4183extern __inline __m512i
4184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4185_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4186{
4187 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4188 (__v8di) __B,
4189 (__v8di)
4190 _mm512_setzero_si512 (),
4191 (__mmask8) __U);
4192}
4193
4194extern __inline __m512i
4195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4197{
4198 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4199 (__v8di) __B,
4200 (__v8di)
0fc245cd 4201 _mm512_undefined_si512 (),
e2098065 4202 (__mmask8) -1);
4203}
4204
4205extern __inline __m512i
4206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4207_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4208{
4209 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4210 (__v8di) __B,
4211 (__v8di) __W,
4212 (__mmask8) __U);
4213}
4214
4215extern __inline __m512i
4216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4217_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4218{
4219 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4220 (__v8di) __B,
4221 (__v8di)
4222 _mm512_setzero_si512 (),
4223 (__mmask8) __U);
4224}
4225
4226#ifdef __OPTIMIZE__
4227extern __inline __m256i
4228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4229_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4230{
4231 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4232 (__v8si)
0fc245cd 4233 _mm256_undefined_si256 (),
e2098065 4234 (__mmask8) -1, __R);
4235}
4236
4237extern __inline __m256i
4238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4239_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4240 const int __R)
4241{
4242 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4243 (__v8si) __W,
4244 (__mmask8) __U, __R);
4245}
4246
4247extern __inline __m256i
4248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4249_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4250{
4251 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4252 (__v8si)
4253 _mm256_setzero_si256 (),
4254 (__mmask8) __U, __R);
4255}
4256
4257extern __inline __m256i
4258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4259_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4260{
4261 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4262 (__v8si)
0fc245cd 4263 _mm256_undefined_si256 (),
e2098065 4264 (__mmask8) -1, __R);
4265}
4266
4267extern __inline __m256i
4268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4270 const int __R)
4271{
4272 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4273 (__v8si) __W,
4274 (__mmask8) __U, __R);
4275}
4276
4277extern __inline __m256i
4278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4280{
4281 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4282 (__v8si)
4283 _mm256_setzero_si256 (),
4284 (__mmask8) __U, __R);
4285}
4286#else
4287#define _mm512_cvtt_roundpd_epi32(A, B) \
0fc245cd 4288 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
e2098065 4289
4290#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4291 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4292
4293#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4294 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4295
4296#define _mm512_cvtt_roundpd_epu32(A, B) \
0fc245cd 4297 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
e2098065 4298
4299#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4300 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4301
4302#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4303 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4304#endif
4305
4306#ifdef __OPTIMIZE__
4307extern __inline __m256i
4308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4309_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4310{
4311 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4312 (__v8si)
0fc245cd 4313 _mm256_undefined_si256 (),
e2098065 4314 (__mmask8) -1, __R);
4315}
4316
4317extern __inline __m256i
4318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4319_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4320 const int __R)
4321{
4322 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4323 (__v8si) __W,
4324 (__mmask8) __U, __R);
4325}
4326
4327extern __inline __m256i
4328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4329_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4330{
4331 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4332 (__v8si)
4333 _mm256_setzero_si256 (),
4334 (__mmask8) __U, __R);
4335}
4336
4337extern __inline __m256i
4338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4339_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4340{
4341 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4342 (__v8si)
0fc245cd 4343 _mm256_undefined_si256 (),
e2098065 4344 (__mmask8) -1, __R);
4345}
4346
4347extern __inline __m256i
4348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4349_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4350 const int __R)
4351{
4352 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4353 (__v8si) __W,
4354 (__mmask8) __U, __R);
4355}
4356
4357extern __inline __m256i
4358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4359_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4360{
4361 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4362 (__v8si)
4363 _mm256_setzero_si256 (),
4364 (__mmask8) __U, __R);
4365}
4366#else
4367#define _mm512_cvt_roundpd_epi32(A, B) \
0fc245cd 4368 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
e2098065 4369
4370#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4371 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4372
4373#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4374 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4375
4376#define _mm512_cvt_roundpd_epu32(A, B) \
0fc245cd 4377 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
e2098065 4378
4379#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4380 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4381
4382#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4383 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4384#endif
4385
4386#ifdef __OPTIMIZE__
4387extern __inline __m512i
4388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4389_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4390{
4391 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4392 (__v16si)
0fc245cd 4393 _mm512_undefined_si512 (),
e2098065 4394 (__mmask16) -1, __R);
4395}
4396
4397extern __inline __m512i
4398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4399_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4400 const int __R)
4401{
4402 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4403 (__v16si) __W,
4404 (__mmask16) __U, __R);
4405}
4406
4407extern __inline __m512i
4408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4409_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4410{
4411 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4412 (__v16si)
4413 _mm512_setzero_si512 (),
4414 (__mmask16) __U, __R);
4415}
4416
4417extern __inline __m512i
4418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4420{
4421 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4422 (__v16si)
0fc245cd 4423 _mm512_undefined_si512 (),
e2098065 4424 (__mmask16) -1, __R);
4425}
4426
4427extern __inline __m512i
4428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4429_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4430 const int __R)
4431{
4432 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4433 (__v16si) __W,
4434 (__mmask16) __U, __R);
4435}
4436
4437extern __inline __m512i
4438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4440{
4441 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4442 (__v16si)
4443 _mm512_setzero_si512 (),
4444 (__mmask16) __U, __R);
4445}
4446#else
4447#define _mm512_cvtt_roundps_epi32(A, B) \
0fc245cd 4448 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
e2098065 4449
4450#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4451 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4452
4453#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4454 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4455
4456#define _mm512_cvtt_roundps_epu32(A, B) \
0fc245cd 4457 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
e2098065 4458
4459#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4460 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4461
4462#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4463 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4464#endif
4465
4466#ifdef __OPTIMIZE__
4467extern __inline __m512i
4468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4469_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4470{
4471 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4472 (__v16si)
0fc245cd 4473 _mm512_undefined_si512 (),
e2098065 4474 (__mmask16) -1, __R);
4475}
4476
4477extern __inline __m512i
4478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4480 const int __R)
4481{
4482 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4483 (__v16si) __W,
4484 (__mmask16) __U, __R);
4485}
4486
4487extern __inline __m512i
4488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4489_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4490{
4491 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4492 (__v16si)
4493 _mm512_setzero_si512 (),
4494 (__mmask16) __U, __R);
4495}
4496
4497extern __inline __m512i
4498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4499_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4500{
4501 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4502 (__v16si)
0fc245cd 4503 _mm512_undefined_si512 (),
e2098065 4504 (__mmask16) -1, __R);
4505}
4506
4507extern __inline __m512i
4508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4509_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4510 const int __R)
4511{
4512 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4513 (__v16si) __W,
4514 (__mmask16) __U, __R);
4515}
4516
4517extern __inline __m512i
4518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4519_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4520{
4521 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4522 (__v16si)
4523 _mm512_setzero_si512 (),
4524 (__mmask16) __U, __R);
4525}
4526#else
4527#define _mm512_cvt_roundps_epi32(A, B) \
0fc245cd 4528 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
e2098065 4529
4530#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4531 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4532
4533#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4534 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4535
4536#define _mm512_cvt_roundps_epu32(A, B) \
0fc245cd 4537 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
e2098065 4538
4539#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4540 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4541
4542#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4543 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4544#endif
4545
4546extern __inline __m128d
4547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4548_mm_cvtu32_sd (__m128d __A, unsigned __B)
4549{
4550 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4551}
4552
4553#ifdef __x86_64__
4554#ifdef __OPTIMIZE__
4555extern __inline __m128d
4556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4557_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4558{
4559 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4560}
4561
4562extern __inline __m128d
4563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4564_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4565{
4566 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4567}
4568
4569extern __inline __m128d
4570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4571_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4572{
4573 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4574}
4575#else
4576#define _mm_cvt_roundu64_sd(A, B, C) \
4577 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4578
4579#define _mm_cvt_roundi64_sd(A, B, C) \
4580 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4581
4582#define _mm_cvt_roundsi64_sd(A, B, C) \
4583 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4584#endif
4585
4586#endif
4587
4588#ifdef __OPTIMIZE__
4589extern __inline __m128
4590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4592{
4593 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4594}
4595
4596extern __inline __m128
4597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4599{
4600 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4601}
4602
4603extern __inline __m128
4604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4606{
4607 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4608}
4609#else
4610#define _mm_cvt_roundu32_ss(A, B, C) \
4611 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4612
4613#define _mm_cvt_roundi32_ss(A, B, C) \
4614 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4615
4616#define _mm_cvt_roundsi32_ss(A, B, C) \
4617 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4618#endif
4619
4620#ifdef __x86_64__
4621#ifdef __OPTIMIZE__
4622extern __inline __m128
4623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4624_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4625{
4626 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4627}
4628
4629extern __inline __m128
4630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4631_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4632{
4633 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4634}
4635
4636extern __inline __m128
4637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4638_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4639{
4640 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4641}
4642#else
4643#define _mm_cvt_roundu64_ss(A, B, C) \
4644 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4645
4646#define _mm_cvt_roundi64_ss(A, B, C) \
4647 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4648
4649#define _mm_cvt_roundsi64_ss(A, B, C) \
4650 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4651#endif
4652
4653#endif
4654
4655extern __inline __m128i
4656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657_mm512_cvtepi32_epi8 (__m512i __A)
4658{
0fc245cd 4659 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4660 (__v16qi)
4661 _mm_undefined_si128 (),
e2098065 4662 (__mmask16) -1);
4663}
4664
f4a19f2a 4665extern __inline void
4666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4667_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4668{
4669 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4670}
4671
e2098065 4672extern __inline __m128i
4673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4674_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4675{
4676 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4677 (__v16qi) __O, __M);
4678}
4679
4680extern __inline __m128i
4681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4683{
4684 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4685 (__v16qi)
4686 _mm_setzero_si128 (),
4687 __M);
4688}
4689
4690extern __inline __m128i
4691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4692_mm512_cvtsepi32_epi8 (__m512i __A)
4693{
0fc245cd 4694 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4695 (__v16qi)
4696 _mm_undefined_si128 (),
e2098065 4697 (__mmask16) -1);
4698}
4699
f4a19f2a 4700extern __inline void
4701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4702_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4703{
4704 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4705}
4706
e2098065 4707extern __inline __m128i
4708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4710{
4711 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4712 (__v16qi) __O, __M);
4713}
4714
4715extern __inline __m128i
4716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4717_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4718{
4719 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4720 (__v16qi)
4721 _mm_setzero_si128 (),
4722 __M);
4723}
4724
4725extern __inline __m128i
4726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4727_mm512_cvtusepi32_epi8 (__m512i __A)
4728{
0fc245cd 4729 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4730 (__v16qi)
4731 _mm_undefined_si128 (),
e2098065 4732 (__mmask16) -1);
4733}
4734
f4a19f2a 4735extern __inline void
4736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4738{
4739 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4740}
4741
e2098065 4742extern __inline __m128i
4743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4744_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4745{
4746 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4747 (__v16qi) __O,
4748 __M);
4749}
4750
4751extern __inline __m128i
4752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4753_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4754{
4755 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4756 (__v16qi)
4757 _mm_setzero_si128 (),
4758 __M);
4759}
4760
4761extern __inline __m256i
4762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4763_mm512_cvtepi32_epi16 (__m512i __A)
4764{
0fc245cd 4765 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4766 (__v16hi)
4767 _mm256_undefined_si256 (),
e2098065 4768 (__mmask16) -1);
4769}
4770
f4a19f2a 4771extern __inline void
4772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4773_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4774{
4775 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4776}
4777
e2098065 4778extern __inline __m256i
4779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4780_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4781{
4782 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4783 (__v16hi) __O, __M);
4784}
4785
4786extern __inline __m256i
4787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4789{
4790 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4791 (__v16hi)
4792 _mm256_setzero_si256 (),
4793 __M);
4794}
4795
4796extern __inline __m256i
4797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798_mm512_cvtsepi32_epi16 (__m512i __A)
4799{
0fc245cd 4800 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4801 (__v16hi)
4802 _mm256_undefined_si256 (),
e2098065 4803 (__mmask16) -1);
4804}
4805
f4a19f2a 4806extern __inline void
4807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4809{
4810 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4811}
4812
e2098065 4813extern __inline __m256i
4814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4815_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4816{
4817 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4818 (__v16hi) __O, __M);
4819}
4820
4821extern __inline __m256i
4822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4823_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4824{
4825 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4826 (__v16hi)
4827 _mm256_setzero_si256 (),
4828 __M);
4829}
4830
4831extern __inline __m256i
4832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833_mm512_cvtusepi32_epi16 (__m512i __A)
4834{
0fc245cd 4835 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4836 (__v16hi)
4837 _mm256_undefined_si256 (),
e2098065 4838 (__mmask16) -1);
4839}
4840
f4a19f2a 4841extern __inline void
4842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4843_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4844{
4845 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4846}
4847
e2098065 4848extern __inline __m256i
4849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4850_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4851{
4852 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4853 (__v16hi) __O,
4854 __M);
4855}
4856
4857extern __inline __m256i
4858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4860{
4861 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4862 (__v16hi)
4863 _mm256_setzero_si256 (),
4864 __M);
4865}
4866
4867extern __inline __m256i
4868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4869_mm512_cvtepi64_epi32 (__m512i __A)
4870{
0fc245cd 4871 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4872 (__v8si)
4873 _mm256_undefined_si256 (),
e2098065 4874 (__mmask8) -1);
4875}
4876
f4a19f2a 4877extern __inline void
4878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4879_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4880{
4881 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4882}
4883
e2098065 4884extern __inline __m256i
4885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4886_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4887{
4888 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4889 (__v8si) __O, __M);
4890}
4891
4892extern __inline __m256i
4893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4895{
4896 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4897 (__v8si)
4898 _mm256_setzero_si256 (),
4899 __M);
4900}
4901
4902extern __inline __m256i
4903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904_mm512_cvtsepi64_epi32 (__m512i __A)
4905{
4906 __v8si __O;
0fc245cd 4907 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4908 (__v8si)
4909 _mm256_undefined_si256 (),
e2098065 4910 (__mmask8) -1);
4911}
4912
f4a19f2a 4913extern __inline void
4914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4915_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4916{
4917 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4918}
4919
e2098065 4920extern __inline __m256i
4921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4922_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4923{
4924 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4925 (__v8si) __O, __M);
4926}
4927
4928extern __inline __m256i
4929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4931{
4932 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4933 (__v8si)
4934 _mm256_setzero_si256 (),
4935 __M);
4936}
4937
4938extern __inline __m256i
4939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4940_mm512_cvtusepi64_epi32 (__m512i __A)
4941{
0fc245cd 4942 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4943 (__v8si)
4944 _mm256_undefined_si256 (),
e2098065 4945 (__mmask8) -1);
4946}
4947
f6c01d8d 4948extern __inline void
f4a19f2a 4949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4950_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4951{
4952 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4953}
4954
e2098065 4955extern __inline __m256i
4956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4957_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4958{
4959 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4960 (__v8si) __O, __M);
4961}
4962
4963extern __inline __m256i
4964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4965_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4966{
4967 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4968 (__v8si)
4969 _mm256_setzero_si256 (),
4970 __M);
4971}
4972
4973extern __inline __m128i
4974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4975_mm512_cvtepi64_epi16 (__m512i __A)
4976{
0fc245cd 4977 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4978 (__v8hi)
4979 _mm_undefined_si128 (),
e2098065 4980 (__mmask8) -1);
4981}
4982
f4a19f2a 4983extern __inline void
4984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4985_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4986{
4987 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4988}
4989
e2098065 4990extern __inline __m128i
4991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4992_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4993{
4994 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4995 (__v8hi) __O, __M);
4996}
4997
4998extern __inline __m128i
4999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5001{
5002 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5003 (__v8hi)
5004 _mm_setzero_si128 (),
5005 __M);
5006}
5007
5008extern __inline __m128i
5009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5010_mm512_cvtsepi64_epi16 (__m512i __A)
5011{
0fc245cd 5012 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5013 (__v8hi)
5014 _mm_undefined_si128 (),
e2098065 5015 (__mmask8) -1);
5016}
5017
f4a19f2a 5018extern __inline void
5019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5020_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5021{
5022 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5023}
5024
e2098065 5025extern __inline __m128i
5026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5027_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5028{
5029 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5030 (__v8hi) __O, __M);
5031}
5032
5033extern __inline __m128i
5034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5036{
5037 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5038 (__v8hi)
5039 _mm_setzero_si128 (),
5040 __M);
5041}
5042
5043extern __inline __m128i
5044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5045_mm512_cvtusepi64_epi16 (__m512i __A)
5046{
0fc245cd 5047 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5048 (__v8hi)
5049 _mm_undefined_si128 (),
e2098065 5050 (__mmask8) -1);
5051}
5052
f4a19f2a 5053extern __inline void
5054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5055_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5056{
5057 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5058}
5059
e2098065 5060extern __inline __m128i
5061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5062_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5063{
5064 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5065 (__v8hi) __O, __M);
5066}
5067
5068extern __inline __m128i
5069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5070_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5071{
5072 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5073 (__v8hi)
5074 _mm_setzero_si128 (),
5075 __M);
5076}
5077
5078extern __inline __m128i
5079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5080_mm512_cvtepi64_epi8 (__m512i __A)
5081{
0fc245cd 5082 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5083 (__v16qi)
5084 _mm_undefined_si128 (),
e2098065 5085 (__mmask8) -1);
5086}
5087
f4a19f2a 5088extern __inline void
5089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5090_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5091{
5092 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5093}
5094
e2098065 5095extern __inline __m128i
5096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5097_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5098{
5099 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5100 (__v16qi) __O, __M);
5101}
5102
5103extern __inline __m128i
5104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5106{
5107 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5108 (__v16qi)
5109 _mm_setzero_si128 (),
5110 __M);
5111}
5112
5113extern __inline __m128i
5114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5115_mm512_cvtsepi64_epi8 (__m512i __A)
5116{
0fc245cd 5117 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5118 (__v16qi)
5119 _mm_undefined_si128 (),
e2098065 5120 (__mmask8) -1);
5121}
5122
f4a19f2a 5123extern __inline void
5124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5125_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5126{
5127 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5128}
5129
e2098065 5130extern __inline __m128i
5131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5132_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5133{
5134 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5135 (__v16qi) __O, __M);
5136}
5137
5138extern __inline __m128i
5139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5141{
5142 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5143 (__v16qi)
5144 _mm_setzero_si128 (),
5145 __M);
5146}
5147
5148extern __inline __m128i
5149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5150_mm512_cvtusepi64_epi8 (__m512i __A)
5151{
0fc245cd 5152 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5153 (__v16qi)
5154 _mm_undefined_si128 (),
e2098065 5155 (__mmask8) -1);
5156}
5157
f4a19f2a 5158extern __inline void
5159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5160_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5161{
5162 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5163}
5164
e2098065 5165extern __inline __m128i
5166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5167_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5168{
5169 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5170 (__v16qi) __O,
5171 __M);
5172}
5173
5174extern __inline __m128i
5175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5176_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5177{
5178 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5179 (__v16qi)
5180 _mm_setzero_si128 (),
5181 __M);
5182}
5183
5184extern __inline __m512d
5185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5186_mm512_cvtepi32_pd (__m256i __A)
5187{
5188 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5189 (__v8df)
0fc245cd 5190 _mm512_undefined_pd (),
e2098065 5191 (__mmask8) -1);
5192}
5193
5194extern __inline __m512d
5195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5196_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5197{
5198 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5199 (__v8df) __W,
5200 (__mmask8) __U);
5201}
5202
5203extern __inline __m512d
5204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5205_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5206{
5207 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5208 (__v8df)
5209 _mm512_setzero_pd (),
5210 (__mmask8) __U);
5211}
5212
5213extern __inline __m512d
5214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5215_mm512_cvtepu32_pd (__m256i __A)
5216{
5217 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5218 (__v8df)
0fc245cd 5219 _mm512_undefined_pd (),
e2098065 5220 (__mmask8) -1);
5221}
5222
5223extern __inline __m512d
5224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5226{
5227 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5228 (__v8df) __W,
5229 (__mmask8) __U);
5230}
5231
5232extern __inline __m512d
5233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5235{
5236 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5237 (__v8df)
5238 _mm512_setzero_pd (),
5239 (__mmask8) __U);
5240}
5241
5242#ifdef __OPTIMIZE__
5243extern __inline __m512
5244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5245_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5246{
5247 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5248 (__v16sf)
0fc245cd 5249 _mm512_undefined_ps (),
e2098065 5250 (__mmask16) -1, __R);
5251}
5252
5253extern __inline __m512
5254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5255_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5256 const int __R)
5257{
5258 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5259 (__v16sf) __W,
5260 (__mmask16) __U, __R);
5261}
5262
5263extern __inline __m512
5264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5265_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5266{
5267 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5268 (__v16sf)
5269 _mm512_setzero_ps (),
5270 (__mmask16) __U, __R);
5271}
5272
5273extern __inline __m512
5274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5275_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5276{
5277 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5278 (__v16sf)
0fc245cd 5279 _mm512_undefined_ps (),
e2098065 5280 (__mmask16) -1, __R);
5281}
5282
5283extern __inline __m512
5284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5285_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5286 const int __R)
5287{
5288 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5289 (__v16sf) __W,
5290 (__mmask16) __U, __R);
5291}
5292
5293extern __inline __m512
5294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5295_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5296{
5297 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5298 (__v16sf)
5299 _mm512_setzero_ps (),
5300 (__mmask16) __U, __R);
5301}
5302
5303#else
5304#define _mm512_cvt_roundepi32_ps(A, B) \
0fc245cd 5305 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
e2098065 5306
5307#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5308 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5309
5310#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5311 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5312
5313#define _mm512_cvt_roundepu32_ps(A, B) \
0fc245cd 5314 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
e2098065 5315
5316#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5317 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5318
5319#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5320 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5321#endif
5322
5323#ifdef __OPTIMIZE__
5324extern __inline __m256d
5325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5326_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5327{
5328 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5329 __imm,
5330 (__v4df)
0fc245cd 5331 _mm256_undefined_pd (),
e2098065 5332 (__mmask8) -1);
5333}
5334
5335extern __inline __m256d
5336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5337_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5338 const int __imm)
5339{
5340 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5341 __imm,
5342 (__v4df) __W,
5343 (__mmask8) __U);
5344}
5345
5346extern __inline __m256d
5347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5349{
5350 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5351 __imm,
5352 (__v4df)
5353 _mm256_setzero_pd (),
5354 (__mmask8) __U);
5355}
5356
5357extern __inline __m128
5358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5359_mm512_extractf32x4_ps (__m512 __A, const int __imm)
5360{
5361 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5362 __imm,
5363 (__v4sf)
0fc245cd 5364 _mm_undefined_ps (),
e2098065 5365 (__mmask8) -1);
5366}
5367
5368extern __inline __m128
5369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5370_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5371 const int __imm)
5372{
5373 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5374 __imm,
5375 (__v4sf) __W,
5376 (__mmask8) __U);
5377}
5378
5379extern __inline __m128
5380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5382{
5383 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5384 __imm,
5385 (__v4sf)
5386 _mm_setzero_ps (),
5387 (__mmask8) __U);
5388}
5389
5390extern __inline __m256i
5391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5392_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5393{
5394 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5395 __imm,
5396 (__v4di)
0fc245cd 5397 _mm256_undefined_si256 (),
e2098065 5398 (__mmask8) -1);
5399}
5400
5401extern __inline __m256i
5402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5403_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5404 const int __imm)
5405{
5406 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5407 __imm,
5408 (__v4di) __W,
5409 (__mmask8) __U);
5410}
5411
5412extern __inline __m256i
5413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5414_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5415{
5416 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5417 __imm,
5418 (__v4di)
5419 _mm256_setzero_si256 (),
5420 (__mmask8) __U);
5421}
5422
5423extern __inline __m128i
5424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5426{
5427 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5428 __imm,
5429 (__v4si)
0fc245cd 5430 _mm_undefined_si128 (),
e2098065 5431 (__mmask8) -1);
5432}
5433
5434extern __inline __m128i
5435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5436_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5437 const int __imm)
5438{
5439 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5440 __imm,
5441 (__v4si) __W,
5442 (__mmask8) __U);
5443}
5444
5445extern __inline __m128i
5446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5447_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5448{
5449 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5450 __imm,
5451 (__v4si)
5452 _mm_setzero_si128 (),
5453 (__mmask8) __U);
5454}
5455#else
5456
5457#define _mm512_extractf64x4_pd(X, C) \
5458 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5459 (int) (C),\
0fc245cd 5460 (__v4df)(__m256d)_mm256_undefined_pd(),\
e2098065 5461 (__mmask8)-1))
5462
5463#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5464 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5465 (int) (C),\
5466 (__v4df)(__m256d)(W),\
5467 (__mmask8)(U)))
5468
5469#define _mm512_maskz_extractf64x4_pd(U, X, C) \
5470 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5471 (int) (C),\
5472 (__v4df)(__m256d)_mm256_setzero_pd(),\
5473 (__mmask8)(U)))
5474
5475#define _mm512_extractf32x4_ps(X, C) \
5476 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5477 (int) (C),\
0fc245cd 5478 (__v4sf)(__m128)_mm_undefined_ps(),\
e2098065 5479 (__mmask8)-1))
5480
5481#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5482 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5483 (int) (C),\
5484 (__v4sf)(__m128)(W),\
5485 (__mmask8)(U)))
5486
5487#define _mm512_maskz_extractf32x4_ps(U, X, C) \
5488 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5489 (int) (C),\
5490 (__v4sf)(__m128)_mm_setzero_ps(),\
5491 (__mmask8)(U)))
5492
5493#define _mm512_extracti64x4_epi64(X, C) \
5494 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5495 (int) (C),\
0fc245cd 5496 (__v4di)(__m256i)_mm256_undefined_si256 (),\
e2098065 5497 (__mmask8)-1))
5498
5499#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5500 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5501 (int) (C),\
5502 (__v4di)(__m256i)(W),\
5503 (__mmask8)(U)))
5504
5505#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5506 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5507 (int) (C),\
5508 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5509 (__mmask8)(U)))
5510
5511#define _mm512_extracti32x4_epi32(X, C) \
5512 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5513 (int) (C),\
0fc245cd 5514 (__v4si)(__m128i)_mm_undefined_si128 (),\
e2098065 5515 (__mmask8)-1))
5516
5517#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5518 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5519 (int) (C),\
5520 (__v4si)(__m128i)(W),\
5521 (__mmask8)(U)))
5522
5523#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5524 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5525 (int) (C),\
5526 (__v4si)(__m128i)_mm_setzero_si128 (),\
5527 (__mmask8)(U)))
5528#endif
5529
5530#ifdef __OPTIMIZE__
5531extern __inline __m512i
5532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5533_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5534{
5535 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5536 (__v4si) __B,
5537 __imm,
5538 (__v16si) __A, -1);
5539}
5540
5541extern __inline __m512
5542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5544{
5545 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5546 (__v4sf) __B,
5547 __imm,
5548 (__v16sf) __A, -1);
5549}
5550
5551extern __inline __m512i
5552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5553_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5554{
5555 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5556 (__v4di) __B,
5557 __imm,
5558 (__v8di)
0fc245cd 5559 _mm512_undefined_si512 (),
e2098065 5560 (__mmask8) -1);
5561}
5562
5563extern __inline __m512i
5564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5565_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5566 __m256i __B, const int __imm)
5567{
5568 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5569 (__v4di) __B,
5570 __imm,
5571 (__v8di) __W,
5572 (__mmask8) __U);
5573}
5574
5575extern __inline __m512i
5576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5578 const int __imm)
5579{
5580 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5581 (__v4di) __B,
5582 __imm,
5583 (__v8di)
5584 _mm512_setzero_si512 (),
5585 (__mmask8) __U);
5586}
5587
5588extern __inline __m512d
5589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5590_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5591{
5592 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5593 (__v4df) __B,
5594 __imm,
5595 (__v8df)
0fc245cd 5596 _mm512_undefined_pd (),
e2098065 5597 (__mmask8) -1);
5598}
5599
5600extern __inline __m512d
5601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5602_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5603 __m256d __B, const int __imm)
5604{
5605 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5606 (__v4df) __B,
5607 __imm,
5608 (__v8df) __W,
5609 (__mmask8) __U);
5610}
5611
5612extern __inline __m512d
5613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5615 const int __imm)
5616{
5617 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5618 (__v4df) __B,
5619 __imm,
5620 (__v8df)
5621 _mm512_setzero_pd (),
5622 (__mmask8) __U);
5623}
5624#else
5625#define _mm512_insertf32x4(X, Y, C) \
5626 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5627 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5628
5629#define _mm512_inserti32x4(X, Y, C) \
5630 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5631 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5632
5633#define _mm512_insertf64x4(X, Y, C) \
5634 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5635 (__v4df)(__m256d) (Y), (int) (C), \
0fc245cd 5636 (__v8df)(__m512d)_mm512_undefined_pd(), \
e2098065 5637 (__mmask8)-1))
5638
5639#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5640 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5641 (__v4df)(__m256d) (Y), (int) (C), \
5642 (__v8df)(__m512d)(W), \
5643 (__mmask8)(U)))
5644
5645#define _mm512_maskz_insertf64x4(U, X, Y, C) \
5646 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5647 (__v4df)(__m256d) (Y), (int) (C), \
5648 (__v8df)(__m512d)_mm512_setzero_pd(), \
5649 (__mmask8)(U)))
5650
5651#define _mm512_inserti64x4(X, Y, C) \
5652 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5653 (__v4di)(__m256i) (Y), (int) (C), \
0fc245cd 5654 (__v8di)(__m512i)_mm512_undefined_si512 (), \
e2098065 5655 (__mmask8)-1))
5656
5657#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5658 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5659 (__v4di)(__m256i) (Y), (int) (C),\
5660 (__v8di)(__m512i)(W),\
5661 (__mmask8)(U)))
5662
5663#define _mm512_maskz_inserti64x4(U, X, Y, C) \
5664 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5665 (__v4di)(__m256i) (Y), (int) (C), \
5666 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5667 (__mmask8)(U)))
5668#endif
5669
5670extern __inline __m512d
5671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5672_mm512_loadu_pd (void const *__P)
5673{
5674 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5675 (__v8df)
0fc245cd 5676 _mm512_undefined_pd (),
e2098065 5677 (__mmask8) -1);
5678}
5679
5680extern __inline __m512d
5681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5682_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5683{
5684 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5685 (__v8df) __W,
5686 (__mmask8) __U);
5687}
5688
5689extern __inline __m512d
5690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5691_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5692{
5693 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5694 (__v8df)
5695 _mm512_setzero_pd (),
5696 (__mmask8) __U);
5697}
5698
5699extern __inline void
5700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5701_mm512_storeu_pd (void *__P, __m512d __A)
5702{
5703 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5704 (__mmask8) -1);
5705}
5706
5707extern __inline void
5708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5709_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5710{
5711 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5712 (__mmask8) __U);
5713}
5714
5715extern __inline __m512
5716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5717_mm512_loadu_ps (void const *__P)
5718{
5719 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5720 (__v16sf)
0fc245cd 5721 _mm512_undefined_ps (),
e2098065 5722 (__mmask16) -1);
5723}
5724
5725extern __inline __m512
5726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5727_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5728{
5729 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5730 (__v16sf) __W,
5731 (__mmask16) __U);
5732}
5733
5734extern __inline __m512
5735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5736_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5737{
5738 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5739 (__v16sf)
5740 _mm512_setzero_ps (),
5741 (__mmask16) __U);
5742}
5743
5744extern __inline void
5745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746_mm512_storeu_ps (void *__P, __m512 __A)
5747{
5748 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5749 (__mmask16) -1);
5750}
5751
5752extern __inline void
5753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5754_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5755{
5756 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5757 (__mmask16) __U);
5758}
5759
5760extern __inline __m512i
5761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5762_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5763{
5764 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5765 (__v8di) __W,
5766 (__mmask8) __U);
5767}
5768
5769extern __inline __m512i
5770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5772{
5773 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5774 (__v8di)
5775 _mm512_setzero_si512 (),
5776 (__mmask8) __U);
5777}
5778
5779extern __inline void
5780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5782{
5783 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5784 (__mmask8) __U);
5785}
5786
5787extern __inline __m512i
5788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23afdab7 5789_mm512_loadu_si512 (void const *__P)
e2098065 5790{
5791 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5792 (__v16si)
5793 _mm512_setzero_si512 (),
5794 (__mmask16) -1);
5795}
5796
5797extern __inline __m512i
5798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5799_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5800{
5801 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5802 (__v16si) __W,
5803 (__mmask16) __U);
5804}
5805
5806extern __inline __m512i
5807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5808_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5809{
5810 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5811 (__v16si)
5812 _mm512_setzero_si512 (),
5813 (__mmask16) __U);
5814}
5815
5816extern __inline void
5817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23afdab7 5818_mm512_storeu_si512 (void *__P, __m512i __A)
e2098065 5819{
5820 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5821 (__mmask16) -1);
5822}
5823
5824extern __inline void
5825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5826_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5827{
5828 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5829 (__mmask16) __U);
5830}
5831
5832extern __inline __m512d
5833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5834_mm512_permutevar_pd (__m512d __A, __m512i __C)
5835{
5836 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5837 (__v8di) __C,
5838 (__v8df)
0fc245cd 5839 _mm512_undefined_pd (),
e2098065 5840 (__mmask8) -1);
5841}
5842
5843extern __inline __m512d
5844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5845_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5846{
5847 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5848 (__v8di) __C,
5849 (__v8df) __W,
5850 (__mmask8) __U);
5851}
5852
5853extern __inline __m512d
5854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5855_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5856{
5857 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5858 (__v8di) __C,
5859 (__v8df)
5860 _mm512_setzero_pd (),
5861 (__mmask8) __U);
5862}
5863
5864extern __inline __m512
5865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5866_mm512_permutevar_ps (__m512 __A, __m512i __C)
5867{
5868 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5869 (__v16si) __C,
5870 (__v16sf)
0fc245cd 5871 _mm512_undefined_ps (),
e2098065 5872 (__mmask16) -1);
5873}
5874
5875extern __inline __m512
5876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5878{
5879 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5880 (__v16si) __C,
5881 (__v16sf) __W,
5882 (__mmask16) __U);
5883}
5884
5885extern __inline __m512
5886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5887_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5888{
5889 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5890 (__v16si) __C,
5891 (__v16sf)
5892 _mm512_setzero_ps (),
5893 (__mmask16) __U);
5894}
5895
5896extern __inline __m512i
5897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5898_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5899{
5900 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5901 /* idx */ ,
5902 (__v8di) __A,
5903 (__v8di) __B,
5904 (__mmask8) -1);
5905}
5906
5907extern __inline __m512i
5908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5909_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5910 __m512i __B)
5911{
5912 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5913 /* idx */ ,
5914 (__v8di) __A,
5915 (__v8di) __B,
5916 (__mmask8) __U);
5917}
5918
5919extern __inline __m512i
5920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5922 __mmask8 __U, __m512i __B)
5923{
5924 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5925 (__v8di) __I
5926 /* idx */ ,
5927 (__v8di) __B,
5928 (__mmask8) __U);
5929}
5930
5931extern __inline __m512i
5932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5934 __m512i __I, __m512i __B)
5935{
5936 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5937 /* idx */ ,
5938 (__v8di) __A,
5939 (__v8di) __B,
5940 (__mmask8) __U);
5941}
5942
5943extern __inline __m512i
5944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5945_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5946{
5947 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5948 /* idx */ ,
5949 (__v16si) __A,
5950 (__v16si) __B,
5951 (__mmask16) -1);
5952}
5953
5954extern __inline __m512i
5955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5956_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5957 __m512i __I, __m512i __B)
5958{
5959 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5960 /* idx */ ,
5961 (__v16si) __A,
5962 (__v16si) __B,
5963 (__mmask16) __U);
5964}
5965
5966extern __inline __m512i
5967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5969 __mmask16 __U, __m512i __B)
5970{
5971 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5972 (__v16si) __I
5973 /* idx */ ,
5974 (__v16si) __B,
5975 (__mmask16) __U);
5976}
5977
5978extern __inline __m512i
5979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5980_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5981 __m512i __I, __m512i __B)
5982{
5983 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5984 /* idx */ ,
5985 (__v16si) __A,
5986 (__v16si) __B,
5987 (__mmask16) __U);
5988}
5989
5990extern __inline __m512d
5991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5992_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5993{
5994 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5995 /* idx */ ,
5996 (__v8df) __A,
5997 (__v8df) __B,
5998 (__mmask8) -1);
5999}
6000
6001extern __inline __m512d
6002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6003_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6004 __m512d __B)
6005{
6006 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6007 /* idx */ ,
6008 (__v8df) __A,
6009 (__v8df) __B,
6010 (__mmask8) __U);
6011}
6012
6013extern __inline __m512d
6014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6015_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6016 __m512d __B)
6017{
6018 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6019 (__v8di) __I
6020 /* idx */ ,
6021 (__v8df) __B,
6022 (__mmask8) __U);
6023}
6024
6025extern __inline __m512d
6026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6027_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6028 __m512d __B)
6029{
6030 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6031 /* idx */ ,
6032 (__v8df) __A,
6033 (__v8df) __B,
6034 (__mmask8) __U);
6035}
6036
6037extern __inline __m512
6038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6040{
6041 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6042 /* idx */ ,
6043 (__v16sf) __A,
6044 (__v16sf) __B,
6045 (__mmask16) -1);
6046}
6047
6048extern __inline __m512
6049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6051{
6052 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6053 /* idx */ ,
6054 (__v16sf) __A,
6055 (__v16sf) __B,
6056 (__mmask16) __U);
6057}
6058
6059extern __inline __m512
6060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6061_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6062 __m512 __B)
6063{
6064 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6065 (__v16si) __I
6066 /* idx */ ,
6067 (__v16sf) __B,
6068 (__mmask16) __U);
6069}
6070
6071extern __inline __m512
6072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6074 __m512 __B)
6075{
6076 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6077 /* idx */ ,
6078 (__v16sf) __A,
6079 (__v16sf) __B,
6080 (__mmask16) __U);
6081}
6082
6083#ifdef __OPTIMIZE__
6084extern __inline __m512d
6085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6086_mm512_permute_pd (__m512d __X, const int __C)
6087{
6088 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6089 (__v8df)
0fc245cd 6090 _mm512_undefined_pd (),
e2098065 6091 (__mmask8) -1);
6092}
6093
6094extern __inline __m512d
6095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6096_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6097{
6098 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6099 (__v8df) __W,
6100 (__mmask8) __U);
6101}
6102
6103extern __inline __m512d
6104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6105_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6106{
6107 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6108 (__v8df)
6109 _mm512_setzero_pd (),
6110 (__mmask8) __U);
6111}
6112
6113extern __inline __m512
6114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6115_mm512_permute_ps (__m512 __X, const int __C)
6116{
6117 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6118 (__v16sf)
0fc245cd 6119 _mm512_undefined_ps (),
e2098065 6120 (__mmask16) -1);
6121}
6122
6123extern __inline __m512
6124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6125_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6126{
6127 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6128 (__v16sf) __W,
6129 (__mmask16) __U);
6130}
6131
6132extern __inline __m512
6133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6134_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6135{
6136 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6137 (__v16sf)
6138 _mm512_setzero_ps (),
6139 (__mmask16) __U);
6140}
6141#else
6142#define _mm512_permute_pd(X, C) \
6143 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
0fc245cd 6144 (__v8df)(__m512d)_mm512_undefined_pd(),\
e2098065 6145 (__mmask8)(-1)))
6146
6147#define _mm512_mask_permute_pd(W, U, X, C) \
6148 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6149 (__v8df)(__m512d)(W), \
6150 (__mmask8)(U)))
6151
6152#define _mm512_maskz_permute_pd(U, X, C) \
6153 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6154 (__v8df)(__m512d)_mm512_setzero_pd(), \
6155 (__mmask8)(U)))
6156
6157#define _mm512_permute_ps(X, C) \
6158 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
0fc245cd 6159 (__v16sf)(__m512)_mm512_undefined_ps(),\
e2098065 6160 (__mmask16)(-1)))
6161
6162#define _mm512_mask_permute_ps(W, U, X, C) \
6163 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6164 (__v16sf)(__m512)(W), \
6165 (__mmask16)(U)))
6166
6167#define _mm512_maskz_permute_ps(U, X, C) \
6168 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6169 (__v16sf)(__m512)_mm512_setzero_ps(), \
6170 (__mmask16)(U)))
6171#endif
6172
6173#ifdef __OPTIMIZE__
6174extern __inline __m512i
6175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6176_mm512_permutex_epi64 (__m512i __X, const int __I)
6177{
6178 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6179 (__v8di)
0fc245cd 6180 _mm512_undefined_si512 (),
e2098065 6181 (__mmask8) (-1));
6182}
6183
6184extern __inline __m512i
6185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6186_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6187 __m512i __X, const int __I)
6188{
6189 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6190 (__v8di) __W,
6191 (__mmask8) __M);
6192}
6193
6194extern __inline __m512i
6195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6196_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6197{
6198 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6199 (__v8di)
6200 _mm512_setzero_si512 (),
6201 (__mmask8) __M);
6202}
6203
6204extern __inline __m512d
6205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6206_mm512_permutex_pd (__m512d __X, const int __M)
6207{
6208 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6209 (__v8df)
0fc245cd 6210 _mm512_undefined_pd (),
e2098065 6211 (__mmask8) -1);
6212}
6213
6214extern __inline __m512d
6215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6216_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6217{
6218 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6219 (__v8df) __W,
6220 (__mmask8) __U);
6221}
6222
6223extern __inline __m512d
6224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6225_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6226{
6227 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6228 (__v8df)
6229 _mm512_setzero_pd (),
6230 (__mmask8) __U);
6231}
6232#else
6233#define _mm512_permutex_pd(X, M) \
6234 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
0fc245cd 6235 (__v8df)(__m512d)_mm512_undefined_pd(),\
6236 (__mmask8)-1))
e2098065 6237
6238#define _mm512_mask_permutex_pd(W, U, X, M) \
6239 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6240 (__v8df)(__m512d)(W), (__mmask8)(U)))
6241
6242#define _mm512_maskz_permutex_pd(U, X, M) \
6243 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6244 (__v8df)(__m512d)_mm512_setzero_pd(),\
6245 (__mmask8)(U)))
6246
6247#define _mm512_permutex_epi64(X, I) \
6248 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6249 (int)(I), \
0fc245cd 6250 (__v8di)(__m512i) \
6251 (_mm512_undefined_si512 ()),\
e2098065 6252 (__mmask8)(-1)))
6253
6254#define _mm512_maskz_permutex_epi64(M, X, I) \
6255 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6256 (int)(I), \
6257 (__v8di)(__m512i) \
6258 (_mm512_setzero_si512 ()),\
6259 (__mmask8)(M)))
6260
6261#define _mm512_mask_permutex_epi64(W, M, X, I) \
6262 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6263 (int)(I), \
6264 (__v8di)(__m512i)(W), \
6265 (__mmask8)(M)))
6266#endif
6267
6268extern __inline __m512i
6269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6271{
389b04c1 6272 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6273 (__v8di) __X,
e2098065 6274 (__v8di)
6275 _mm512_setzero_si512 (),
6276 __M);
6277}
6278
6279extern __inline __m512i
6280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6281_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6282{
389b04c1 6283 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6284 (__v8di) __X,
e2098065 6285 (__v8di)
0fc245cd 6286 _mm512_undefined_si512 (),
e2098065 6287 (__mmask8) -1);
6288}
6289
6290extern __inline __m512i
6291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6292_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6293 __m512i __Y)
6294{
389b04c1 6295 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6296 (__v8di) __X,
e2098065 6297 (__v8di) __W,
6298 __M);
6299}
6300
6301extern __inline __m512i
6302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6303_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6304{
389b04c1 6305 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6306 (__v16si) __X,
e2098065 6307 (__v16si)
6308 _mm512_setzero_si512 (),
6309 __M);
6310}
6311
6312extern __inline __m512i
6313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6314_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6315{
389b04c1 6316 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6317 (__v16si) __X,
e2098065 6318 (__v16si)
0fc245cd 6319 _mm512_undefined_si512 (),
e2098065 6320 (__mmask16) -1);
6321}
6322
6323extern __inline __m512i
6324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6326 __m512i __Y)
6327{
389b04c1 6328 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6329 (__v16si) __X,
e2098065 6330 (__v16si) __W,
6331 __M);
6332}
6333
6334extern __inline __m512d
6335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6336_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6337{
6338 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6339 (__v8di) __X,
6340 (__v8df)
0fc245cd 6341 _mm512_undefined_pd (),
e2098065 6342 (__mmask8) -1);
6343}
6344
6345extern __inline __m512d
6346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6347_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6348{
6349 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6350 (__v8di) __X,
6351 (__v8df) __W,
6352 (__mmask8) __U);
6353}
6354
6355extern __inline __m512d
6356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6357_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6358{
6359 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6360 (__v8di) __X,
6361 (__v8df)
6362 _mm512_setzero_pd (),
6363 (__mmask8) __U);
6364}
6365
6366extern __inline __m512
6367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6368_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6369{
6370 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6371 (__v16si) __X,
6372 (__v16sf)
0fc245cd 6373 _mm512_undefined_ps (),
e2098065 6374 (__mmask16) -1);
6375}
6376
6377extern __inline __m512
6378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6379_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6380{
6381 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6382 (__v16si) __X,
6383 (__v16sf) __W,
6384 (__mmask16) __U);
6385}
6386
6387extern __inline __m512
6388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6390{
6391 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6392 (__v16si) __X,
6393 (__v16sf)
6394 _mm512_setzero_ps (),
6395 (__mmask16) __U);
6396}
6397
6398#ifdef __OPTIMIZE__
6399extern __inline __m512
6400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6401_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6402{
6403 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6404 (__v16sf) __V, __imm,
6405 (__v16sf)
0fc245cd 6406 _mm512_undefined_ps (),
e2098065 6407 (__mmask16) -1);
6408}
6409
6410extern __inline __m512
6411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6413 __m512 __V, const int __imm)
6414{
6415 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6416 (__v16sf) __V, __imm,
6417 (__v16sf) __W,
6418 (__mmask16) __U);
6419}
6420
6421extern __inline __m512
6422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6424{
6425 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6426 (__v16sf) __V, __imm,
6427 (__v16sf)
6428 _mm512_setzero_ps (),
6429 (__mmask16) __U);
6430}
6431
6432extern __inline __m512d
6433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6435{
6436 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6437 (__v8df) __V, __imm,
6438 (__v8df)
0fc245cd 6439 _mm512_undefined_pd (),
e2098065 6440 (__mmask8) -1);
6441}
6442
6443extern __inline __m512d
6444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6445_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6446 __m512d __V, const int __imm)
6447{
6448 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6449 (__v8df) __V, __imm,
6450 (__v8df) __W,
6451 (__mmask8) __U);
6452}
6453
6454extern __inline __m512d
6455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6456_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6457 const int __imm)
6458{
6459 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6460 (__v8df) __V, __imm,
6461 (__v8df)
6462 _mm512_setzero_pd (),
6463 (__mmask8) __U);
6464}
6465
6466extern __inline __m512d
6467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6469 const int __imm, const int __R)
6470{
6471 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6472 (__v8df) __B,
6473 (__v8di) __C,
6474 __imm,
6475 (__mmask8) -1, __R);
6476}
6477
6478extern __inline __m512d
6479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6481 __m512i __C, const int __imm, const int __R)
6482{
6483 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6484 (__v8df) __B,
6485 (__v8di) __C,
6486 __imm,
6487 (__mmask8) __U, __R);
6488}
6489
6490extern __inline __m512d
6491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6492_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6493 __m512i __C, const int __imm, const int __R)
6494{
6495 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6496 (__v8df) __B,
6497 (__v8di) __C,
6498 __imm,
6499 (__mmask8) __U, __R);
6500}
6501
6502extern __inline __m512
6503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6504_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6505 const int __imm, const int __R)
6506{
6507 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6508 (__v16sf) __B,
6509 (__v16si) __C,
6510 __imm,
6511 (__mmask16) -1, __R);
6512}
6513
6514extern __inline __m512
6515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6516_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6517 __m512i __C, const int __imm, const int __R)
6518{
6519 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6520 (__v16sf) __B,
6521 (__v16si) __C,
6522 __imm,
6523 (__mmask16) __U, __R);
6524}
6525
6526extern __inline __m512
6527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6528_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6529 __m512i __C, const int __imm, const int __R)
6530{
6531 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6532 (__v16sf) __B,
6533 (__v16si) __C,
6534 __imm,
6535 (__mmask16) __U, __R);
6536}
6537
6538extern __inline __m128d
6539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6540_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6541 const int __imm, const int __R)
6542{
6543 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6544 (__v2df) __B,
6545 (__v2di) __C, __imm,
6546 (__mmask8) -1, __R);
6547}
6548
6549extern __inline __m128d
6550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6552 __m128i __C, const int __imm, const int __R)
6553{
6554 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6555 (__v2df) __B,
6556 (__v2di) __C, __imm,
6557 (__mmask8) __U, __R);
6558}
6559
6560extern __inline __m128d
6561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6562_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6563 __m128i __C, const int __imm, const int __R)
6564{
6565 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6566 (__v2df) __B,
6567 (__v2di) __C,
6568 __imm,
6569 (__mmask8) __U, __R);
6570}
6571
6572extern __inline __m128
6573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6574_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6575 const int __imm, const int __R)
6576{
6577 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6578 (__v4sf) __B,
6579 (__v4si) __C, __imm,
6580 (__mmask8) -1, __R);
6581}
6582
6583extern __inline __m128
6584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6585_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6586 __m128i __C, const int __imm, const int __R)
6587{
6588 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6589 (__v4sf) __B,
6590 (__v4si) __C, __imm,
6591 (__mmask8) __U, __R);
6592}
6593
6594extern __inline __m128
6595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6596_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6597 __m128i __C, const int __imm, const int __R)
6598{
6599 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6600 (__v4sf) __B,
6601 (__v4si) __C, __imm,
6602 (__mmask8) __U, __R);
6603}
6604
6605#else
6606#define _mm512_shuffle_pd(X, Y, C) \
6607 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6608 (__v8df)(__m512d)(Y), (int)(C),\
0fc245cd 6609 (__v8df)(__m512d)_mm512_undefined_pd(),\
e2098065 6610 (__mmask8)-1))
6611
6612#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6613 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6614 (__v8df)(__m512d)(Y), (int)(C),\
6615 (__v8df)(__m512d)(W),\
6616 (__mmask8)(U)))
6617
6618#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6619 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6620 (__v8df)(__m512d)(Y), (int)(C),\
6621 (__v8df)(__m512d)_mm512_setzero_pd(),\
6622 (__mmask8)(U)))
6623
6624#define _mm512_shuffle_ps(X, Y, C) \
6625 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6626 (__v16sf)(__m512)(Y), (int)(C),\
0fc245cd 6627 (__v16sf)(__m512)_mm512_undefined_ps(),\
e2098065 6628 (__mmask16)-1))
6629
6630#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6631 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6632 (__v16sf)(__m512)(Y), (int)(C),\
6633 (__v16sf)(__m512)(W),\
6634 (__mmask16)(U)))
6635
6636#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6637 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6638 (__v16sf)(__m512)(Y), (int)(C),\
6639 (__v16sf)(__m512)_mm512_setzero_ps(),\
6640 (__mmask16)(U)))
6641
6642#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6643 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6644 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6645 (__mmask8)(-1), (R)))
6646
6647#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6648 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6649 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6650 (__mmask8)(U), (R)))
6651
6652#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6653 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6654 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6655 (__mmask8)(U), (R)))
6656
6657#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6658 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6659 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6660 (__mmask16)(-1), (R)))
6661
6662#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6663 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6664 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6665 (__mmask16)(U), (R)))
6666
6667#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6668 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6669 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6670 (__mmask16)(U), (R)))
6671
6672#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6673 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6674 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6675 (__mmask8)(-1), (R)))
6676
6677#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6678 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6679 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6680 (__mmask8)(U), (R)))
6681
6682#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6683 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6684 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6685 (__mmask8)(U), (R)))
6686
6687#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6688 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6689 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6690 (__mmask8)(-1), (R)))
6691
6692#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6693 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6694 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6695 (__mmask8)(U), (R)))
6696
6697#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6698 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6699 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6700 (__mmask8)(U), (R)))
6701#endif
6702
6703extern __inline __m512
6704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6705_mm512_movehdup_ps (__m512 __A)
6706{
6707 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6708 (__v16sf)
0fc245cd 6709 _mm512_undefined_ps (),
e2098065 6710 (__mmask16) -1);
6711}
6712
6713extern __inline __m512
6714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6715_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6716{
6717 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6718 (__v16sf) __W,
6719 (__mmask16) __U);
6720}
6721
6722extern __inline __m512
6723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6725{
6726 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6727 (__v16sf)
6728 _mm512_setzero_ps (),
6729 (__mmask16) __U);
6730}
6731
6732extern __inline __m512
6733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6734_mm512_moveldup_ps (__m512 __A)
6735{
6736 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6737 (__v16sf)
0fc245cd 6738 _mm512_undefined_ps (),
e2098065 6739 (__mmask16) -1);
6740}
6741
6742extern __inline __m512
6743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6745{
6746 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6747 (__v16sf) __W,
6748 (__mmask16) __U);
6749}
6750
6751extern __inline __m512
6752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6753_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6754{
6755 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6756 (__v16sf)
6757 _mm512_setzero_ps (),
6758 (__mmask16) __U);
6759}
6760
6761extern __inline __m512i
6762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6763_mm512_or_si512 (__m512i __A, __m512i __B)
6764{
d521a5b2 6765 return (__m512i) ((__v16su) __A | (__v16su) __B);
e2098065 6766}
6767
6768extern __inline __m512i
6769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6770_mm512_or_epi32 (__m512i __A, __m512i __B)
6771{
d521a5b2 6772 return (__m512i) ((__v16su) __A | (__v16su) __B);
e2098065 6773}
6774
6775extern __inline __m512i
6776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6778{
6779 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6780 (__v16si) __B,
6781 (__v16si) __W,
6782 (__mmask16) __U);
6783}
6784
6785extern __inline __m512i
6786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6787_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6788{
6789 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6790 (__v16si) __B,
6791 (__v16si)
6792 _mm512_setzero_si512 (),
6793 (__mmask16) __U);
6794}
6795
6796extern __inline __m512i
6797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6798_mm512_or_epi64 (__m512i __A, __m512i __B)
6799{
d521a5b2 6800 return (__m512i) ((__v8du) __A | (__v8du) __B);
e2098065 6801}
6802
6803extern __inline __m512i
6804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6805_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6806{
6807 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6808 (__v8di) __B,
6809 (__v8di) __W,
6810 (__mmask8) __U);
6811}
6812
6813extern __inline __m512i
6814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6815_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6816{
6817 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6818 (__v8di) __B,
6819 (__v8di)
6820 _mm512_setzero_si512 (),
6821 (__mmask8) __U);
6822}
6823
6824extern __inline __m512i
6825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6826_mm512_xor_si512 (__m512i __A, __m512i __B)
6827{
d521a5b2 6828 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
e2098065 6829}
6830
6831extern __inline __m512i
6832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6833_mm512_xor_epi32 (__m512i __A, __m512i __B)
6834{
d521a5b2 6835 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
e2098065 6836}
6837
6838extern __inline __m512i
6839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6840_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6841{
6842 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6843 (__v16si) __B,
6844 (__v16si) __W,
6845 (__mmask16) __U);
6846}
6847
6848extern __inline __m512i
6849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6850_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6851{
6852 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6853 (__v16si) __B,
6854 (__v16si)
6855 _mm512_setzero_si512 (),
6856 (__mmask16) __U);
6857}
6858
6859extern __inline __m512i
6860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6861_mm512_xor_epi64 (__m512i __A, __m512i __B)
6862{
d521a5b2 6863 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
e2098065 6864}
6865
6866extern __inline __m512i
6867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6868_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6869{
6870 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6871 (__v8di) __B,
6872 (__v8di) __W,
6873 (__mmask8) __U);
6874}
6875
6876extern __inline __m512i
6877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6879{
6880 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6881 (__v8di) __B,
6882 (__v8di)
6883 _mm512_setzero_si512 (),
6884 (__mmask8) __U);
6885}
6886
6887#ifdef __OPTIMIZE__
6888extern __inline __m512i
6889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6890_mm512_rol_epi32 (__m512i __A, const int __B)
6891{
6892 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6893 (__v16si)
0fc245cd 6894 _mm512_undefined_si512 (),
e2098065 6895 (__mmask16) -1);
6896}
6897
6898extern __inline __m512i
6899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6901{
6902 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6903 (__v16si) __W,
6904 (__mmask16) __U);
6905}
6906
6907extern __inline __m512i
6908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6909_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6910{
6911 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6912 (__v16si)
6913 _mm512_setzero_si512 (),
6914 (__mmask16) __U);
6915}
6916
6917extern __inline __m512i
6918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6919_mm512_ror_epi32 (__m512i __A, int __B)
6920{
6921 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6922 (__v16si)
0fc245cd 6923 _mm512_undefined_si512 (),
e2098065 6924 (__mmask16) -1);
6925}
6926
6927extern __inline __m512i
6928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6929_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6930{
6931 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6932 (__v16si) __W,
6933 (__mmask16) __U);
6934}
6935
6936extern __inline __m512i
6937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6938_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6939{
6940 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6941 (__v16si)
6942 _mm512_setzero_si512 (),
6943 (__mmask16) __U);
6944}
6945
6946extern __inline __m512i
6947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6948_mm512_rol_epi64 (__m512i __A, const int __B)
6949{
6950 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6951 (__v8di)
0fc245cd 6952 _mm512_undefined_si512 (),
e2098065 6953 (__mmask8) -1);
6954}
6955
6956extern __inline __m512i
6957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6958_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6959{
6960 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6961 (__v8di) __W,
6962 (__mmask8) __U);
6963}
6964
6965extern __inline __m512i
6966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6967_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6968{
6969 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6970 (__v8di)
6971 _mm512_setzero_si512 (),
6972 (__mmask8) __U);
6973}
6974
6975extern __inline __m512i
6976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977_mm512_ror_epi64 (__m512i __A, int __B)
6978{
6979 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6980 (__v8di)
0fc245cd 6981 _mm512_undefined_si512 (),
e2098065 6982 (__mmask8) -1);
6983}
6984
6985extern __inline __m512i
6986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6987_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6988{
6989 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6990 (__v8di) __W,
6991 (__mmask8) __U);
6992}
6993
6994extern __inline __m512i
6995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6996_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6997{
6998 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6999 (__v8di)
7000 _mm512_setzero_si512 (),
7001 (__mmask8) __U);
7002}
7003
7004#else
7005#define _mm512_rol_epi32(A, B) \
7006 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7007 (int)(B), \
0fc245cd 7008 (__v16si)_mm512_undefined_si512 (), \
e2098065 7009 (__mmask16)(-1)))
7010#define _mm512_mask_rol_epi32(W, U, A, B) \
7011 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7012 (int)(B), \
7013 (__v16si)(__m512i)(W), \
7014 (__mmask16)(U)))
7015#define _mm512_maskz_rol_epi32(U, A, B) \
7016 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7017 (int)(B), \
7018 (__v16si)_mm512_setzero_si512 (), \
7019 (__mmask16)(U)))
7020#define _mm512_ror_epi32(A, B) \
7021 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7022 (int)(B), \
0fc245cd 7023 (__v16si)_mm512_undefined_si512 (), \
e2098065 7024 (__mmask16)(-1)))
7025#define _mm512_mask_ror_epi32(W, U, A, B) \
7026 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7027 (int)(B), \
7028 (__v16si)(__m512i)(W), \
7029 (__mmask16)(U)))
7030#define _mm512_maskz_ror_epi32(U, A, B) \
7031 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7032 (int)(B), \
7033 (__v16si)_mm512_setzero_si512 (), \
7034 (__mmask16)(U)))
7035#define _mm512_rol_epi64(A, B) \
7036 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7037 (int)(B), \
0fc245cd 7038 (__v8di)_mm512_undefined_si512 (), \
e2098065 7039 (__mmask8)(-1)))
7040#define _mm512_mask_rol_epi64(W, U, A, B) \
7041 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7042 (int)(B), \
7043 (__v8di)(__m512i)(W), \
7044 (__mmask8)(U)))
7045#define _mm512_maskz_rol_epi64(U, A, B) \
7046 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7047 (int)(B), \
7048 (__v8di)_mm512_setzero_si512 (), \
7049 (__mmask8)(U)))
7050
7051#define _mm512_ror_epi64(A, B) \
7052 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7053 (int)(B), \
0fc245cd 7054 (__v8di)_mm512_undefined_si512 (), \
e2098065 7055 (__mmask8)(-1)))
7056#define _mm512_mask_ror_epi64(W, U, A, B) \
7057 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7058 (int)(B), \
7059 (__v8di)(__m512i)(W), \
7060 (__mmask8)(U)))
7061#define _mm512_maskz_ror_epi64(U, A, B) \
7062 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7063 (int)(B), \
7064 (__v8di)_mm512_setzero_si512 (), \
7065 (__mmask8)(U)))
7066#endif
7067
7068extern __inline __m512i
7069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7070_mm512_and_si512 (__m512i __A, __m512i __B)
7071{
d521a5b2 7072 return (__m512i) ((__v16su) __A & (__v16su) __B);
e2098065 7073}
7074
7075extern __inline __m512i
7076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7077_mm512_and_epi32 (__m512i __A, __m512i __B)
7078{
d521a5b2 7079 return (__m512i) ((__v16su) __A & (__v16su) __B);
e2098065 7080}
7081
7082extern __inline __m512i
7083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7084_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7085{
7086 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7087 (__v16si) __B,
7088 (__v16si) __W,
7089 (__mmask16) __U);
7090}
7091
7092extern __inline __m512i
7093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7094_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7095{
7096 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7097 (__v16si) __B,
7098 (__v16si)
7099 _mm512_setzero_si512 (),
7100 (__mmask16) __U);
7101}
7102
7103extern __inline __m512i
7104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7105_mm512_and_epi64 (__m512i __A, __m512i __B)
7106{
d521a5b2 7107 return (__m512i) ((__v8du) __A & (__v8du) __B);
e2098065 7108}
7109
7110extern __inline __m512i
7111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7112_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7113{
7114 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7115 (__v8di) __B,
7116 (__v8di) __W, __U);
7117}
7118
7119extern __inline __m512i
7120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7121_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7122{
7123 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7124 (__v8di) __B,
7125 (__v8di)
7126 _mm512_setzero_pd (),
7127 __U);
7128}
7129
7130extern __inline __m512i
7131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7132_mm512_andnot_si512 (__m512i __A, __m512i __B)
7133{
7134 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7135 (__v16si) __B,
7136 (__v16si)
0fc245cd 7137 _mm512_undefined_si512 (),
e2098065 7138 (__mmask16) -1);
7139}
7140
7141extern __inline __m512i
7142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7143_mm512_andnot_epi32 (__m512i __A, __m512i __B)
7144{
7145 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7146 (__v16si) __B,
7147 (__v16si)
0fc245cd 7148 _mm512_undefined_si512 (),
e2098065 7149 (__mmask16) -1);
7150}
7151
7152extern __inline __m512i
7153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7154_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7155{
7156 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7157 (__v16si) __B,
7158 (__v16si) __W,
7159 (__mmask16) __U);
7160}
7161
7162extern __inline __m512i
7163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7165{
7166 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7167 (__v16si) __B,
7168 (__v16si)
7169 _mm512_setzero_si512 (),
7170 (__mmask16) __U);
7171}
7172
7173extern __inline __m512i
7174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175_mm512_andnot_epi64 (__m512i __A, __m512i __B)
7176{
7177 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7178 (__v8di) __B,
7179 (__v8di)
0fc245cd 7180 _mm512_undefined_si512 (),
e2098065 7181 (__mmask8) -1);
7182}
7183
7184extern __inline __m512i
7185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7187{
7188 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7189 (__v8di) __B,
7190 (__v8di) __W, __U);
7191}
7192
7193extern __inline __m512i
7194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7195_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7196{
7197 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7198 (__v8di) __B,
7199 (__v8di)
7200 _mm512_setzero_pd (),
7201 __U);
7202}
7203
7204extern __inline __mmask16
7205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7206_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7207{
7208 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7209 (__v16si) __B,
7210 (__mmask16) -1);
7211}
7212
7213extern __inline __mmask16
7214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7215_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7216{
7217 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7218 (__v16si) __B, __U);
7219}
7220
7221extern __inline __mmask8
7222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7223_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7224{
7225 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7226 (__v8di) __B,
7227 (__mmask8) -1);
7228}
7229
7230extern __inline __mmask8
7231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7232_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7233{
7234 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7235}
7236
f46a34a6 7237extern __inline __mmask16
7238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7239_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7240{
7241 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7242 (__v16si) __B,
7243 (__mmask16) -1);
7244}
7245
7246extern __inline __mmask16
7247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7248_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7249{
7250 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7251 (__v16si) __B, __U);
7252}
7253
7254extern __inline __mmask8
7255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7256_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7257{
7258 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7259 (__v8di) __B,
7260 (__mmask8) -1);
7261}
7262
7263extern __inline __mmask8
7264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7265_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7266{
7267 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7268 (__v8di) __B, __U);
7269}
7270
e2098065 7271extern __inline __m512i
7272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7273_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7274{
7275 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7276 (__v16si) __B,
7277 (__v16si)
0fc245cd 7278 _mm512_undefined_si512 (),
e2098065 7279 (__mmask16) -1);
7280}
7281
7282extern __inline __m512i
7283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7285 __m512i __B)
7286{
7287 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7288 (__v16si) __B,
7289 (__v16si) __W,
7290 (__mmask16) __U);
7291}
7292
7293extern __inline __m512i
7294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7295_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7296{
7297 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7298 (__v16si) __B,
7299 (__v16si)
7300 _mm512_setzero_si512 (),
7301 (__mmask16) __U);
7302}
7303
7304extern __inline __m512i
7305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7306_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7307{
7308 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7309 (__v8di) __B,
7310 (__v8di)
0fc245cd 7311 _mm512_undefined_si512 (),
e2098065 7312 (__mmask8) -1);
7313}
7314
7315extern __inline __m512i
7316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7317_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7318{
7319 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7320 (__v8di) __B,
7321 (__v8di) __W,
7322 (__mmask8) __U);
7323}
7324
7325extern __inline __m512i
7326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7327_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7328{
7329 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7330 (__v8di) __B,
7331 (__v8di)
7332 _mm512_setzero_si512 (),
7333 (__mmask8) __U);
7334}
7335
7336extern __inline __m512i
7337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7338_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7339{
7340 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7341 (__v16si) __B,
7342 (__v16si)
0fc245cd 7343 _mm512_undefined_si512 (),
e2098065 7344 (__mmask16) -1);
7345}
7346
7347extern __inline __m512i
7348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7349_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7350 __m512i __B)
7351{
7352 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7353 (__v16si) __B,
7354 (__v16si) __W,
7355 (__mmask16) __U);
7356}
7357
7358extern __inline __m512i
7359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7360_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7361{
7362 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7363 (__v16si) __B,
7364 (__v16si)
7365 _mm512_setzero_si512 (),
7366 (__mmask16) __U);
7367}
7368
7369extern __inline __m512i
7370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7371_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7372{
7373 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7374 (__v8di) __B,
7375 (__v8di)
0fc245cd 7376 _mm512_undefined_si512 (),
e2098065 7377 (__mmask8) -1);
7378}
7379
7380extern __inline __m512i
7381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7382_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7383{
7384 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7385 (__v8di) __B,
7386 (__v8di) __W,
7387 (__mmask8) __U);
7388}
7389
7390extern __inline __m512i
7391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7392_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7393{
7394 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7395 (__v8di) __B,
7396 (__v8di)
7397 _mm512_setzero_si512 (),
7398 (__mmask8) __U);
7399}
7400
7401#ifdef __x86_64__
7402#ifdef __OPTIMIZE__
7403extern __inline unsigned long long
7404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7405_mm_cvt_roundss_u64 (__m128 __A, const int __R)
7406{
7407 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7408}
7409
7410extern __inline long long
7411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412_mm_cvt_roundss_si64 (__m128 __A, const int __R)
7413{
7414 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7415}
7416
7417extern __inline long long
7418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7419_mm_cvt_roundss_i64 (__m128 __A, const int __R)
7420{
7421 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7422}
7423
7424extern __inline unsigned long long
7425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7426_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7427{
7428 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7429}
7430
7431extern __inline long long
7432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7433_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7434{
7435 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7436}
7437
7438extern __inline long long
7439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7440_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7441{
7442 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7443}
7444#else
7445#define _mm_cvt_roundss_u64(A, B) \
7446 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7447
7448#define _mm_cvt_roundss_si64(A, B) \
7449 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7450
7451#define _mm_cvt_roundss_i64(A, B) \
7452 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7453
7454#define _mm_cvtt_roundss_u64(A, B) \
7455 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7456
7457#define _mm_cvtt_roundss_i64(A, B) \
7458 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7459
7460#define _mm_cvtt_roundss_si64(A, B) \
7461 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7462#endif
7463#endif
7464
7465#ifdef __OPTIMIZE__
7466extern __inline unsigned
7467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7468_mm_cvt_roundss_u32 (__m128 __A, const int __R)
7469{
7470 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7471}
7472
7473extern __inline int
7474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7475_mm_cvt_roundss_si32 (__m128 __A, const int __R)
7476{
7477 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7478}
7479
7480extern __inline int
7481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7482_mm_cvt_roundss_i32 (__m128 __A, const int __R)
7483{
7484 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7485}
7486
7487extern __inline unsigned
7488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7489_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7490{
7491 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7492}
7493
7494extern __inline int
7495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7496_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7497{
7498 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7499}
7500
7501extern __inline int
7502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7503_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7504{
7505 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7506}
7507#else
7508#define _mm_cvt_roundss_u32(A, B) \
7509 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7510
7511#define _mm_cvt_roundss_si32(A, B) \
7512 ((int)__builtin_ia32_vcvtss2si32(A, B))
7513
7514#define _mm_cvt_roundss_i32(A, B) \
7515 ((int)__builtin_ia32_vcvtss2si32(A, B))
7516
7517#define _mm_cvtt_roundss_u32(A, B) \
7518 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7519
7520#define _mm_cvtt_roundss_si32(A, B) \
7521 ((int)__builtin_ia32_vcvttss2si32(A, B))
7522
7523#define _mm_cvtt_roundss_i32(A, B) \
7524 ((int)__builtin_ia32_vcvttss2si32(A, B))
7525#endif
7526
7527#ifdef __x86_64__
7528#ifdef __OPTIMIZE__
7529extern __inline unsigned long long
7530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7531_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7532{
7533 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7534}
7535
7536extern __inline long long
7537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7538_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7539{
7540 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7541}
7542
7543extern __inline long long
7544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7545_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7546{
7547 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7548}
7549
7550extern __inline unsigned long long
7551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7552_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7553{
7554 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7555}
7556
7557extern __inline long long
7558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7559_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7560{
7561 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7562}
7563
7564extern __inline long long
7565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7566_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7567{
7568 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7569}
7570#else
7571#define _mm_cvt_roundsd_u64(A, B) \
7572 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7573
7574#define _mm_cvt_roundsd_si64(A, B) \
7575 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7576
7577#define _mm_cvt_roundsd_i64(A, B) \
7578 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7579
7580#define _mm_cvtt_roundsd_u64(A, B) \
7581 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7582
7583#define _mm_cvtt_roundsd_si64(A, B) \
7584 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7585
7586#define _mm_cvtt_roundsd_i64(A, B) \
7587 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7588#endif
7589#endif
7590
7591#ifdef __OPTIMIZE__
7592extern __inline unsigned
7593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7594_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7595{
7596 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7597}
7598
7599extern __inline int
7600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7601_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7602{
7603 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7604}
7605
7606extern __inline int
7607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7608_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7609{
7610 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7611}
7612
7613extern __inline unsigned
7614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7615_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7616{
7617 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7618}
7619
7620extern __inline int
7621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7622_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7623{
7624 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7625}
7626
7627extern __inline int
7628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7629_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7630{
7631 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7632}
7633#else
7634#define _mm_cvt_roundsd_u32(A, B) \
7635 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7636
7637#define _mm_cvt_roundsd_si32(A, B) \
7638 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7639
7640#define _mm_cvt_roundsd_i32(A, B) \
7641 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7642
7643#define _mm_cvtt_roundsd_u32(A, B) \
7644 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7645
7646#define _mm_cvtt_roundsd_si32(A, B) \
7647 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7648
7649#define _mm_cvtt_roundsd_i32(A, B) \
7650 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7651#endif
7652
7653extern __inline __m512d
7654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7655_mm512_movedup_pd (__m512d __A)
7656{
7657 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7658 (__v8df)
0fc245cd 7659 _mm512_undefined_pd (),
e2098065 7660 (__mmask8) -1);
7661}
7662
7663extern __inline __m512d
7664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7665_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7666{
7667 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7668 (__v8df) __W,
7669 (__mmask8) __U);
7670}
7671
7672extern __inline __m512d
7673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7674_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7675{
7676 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7677 (__v8df)
7678 _mm512_setzero_pd (),
7679 (__mmask8) __U);
7680}
7681
7682extern __inline __m512d
7683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7684_mm512_unpacklo_pd (__m512d __A, __m512d __B)
7685{
7686 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7687 (__v8df) __B,
7688 (__v8df)
0fc245cd 7689 _mm512_undefined_pd (),
e2098065 7690 (__mmask8) -1);
7691}
7692
7693extern __inline __m512d
7694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7695_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7696{
7697 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7698 (__v8df) __B,
7699 (__v8df) __W,
7700 (__mmask8) __U);
7701}
7702
7703extern __inline __m512d
7704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7705_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7706{
7707 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7708 (__v8df) __B,
7709 (__v8df)
7710 _mm512_setzero_pd (),
7711 (__mmask8) __U);
7712}
7713
7714extern __inline __m512d
7715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7716_mm512_unpackhi_pd (__m512d __A, __m512d __B)
7717{
7718 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7719 (__v8df) __B,
7720 (__v8df)
0fc245cd 7721 _mm512_undefined_pd (),
e2098065 7722 (__mmask8) -1);
7723}
7724
7725extern __inline __m512d
7726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7727_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7728{
7729 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7730 (__v8df) __B,
7731 (__v8df) __W,
7732 (__mmask8) __U);
7733}
7734
7735extern __inline __m512d
7736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7737_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7738{
7739 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7740 (__v8df) __B,
7741 (__v8df)
7742 _mm512_setzero_pd (),
7743 (__mmask8) __U);
7744}
7745
7746extern __inline __m512
7747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7748_mm512_unpackhi_ps (__m512 __A, __m512 __B)
7749{
7750 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7751 (__v16sf) __B,
7752 (__v16sf)
0fc245cd 7753 _mm512_undefined_ps (),
e2098065 7754 (__mmask16) -1);
7755}
7756
7757extern __inline __m512
7758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7759_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7760{
7761 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7762 (__v16sf) __B,
7763 (__v16sf) __W,
7764 (__mmask16) __U);
7765}
7766
7767extern __inline __m512
7768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7769_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7770{
7771 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7772 (__v16sf) __B,
7773 (__v16sf)
7774 _mm512_setzero_ps (),
7775 (__mmask16) __U);
7776}
7777
7778#ifdef __OPTIMIZE__
7779extern __inline __m512d
7780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7781_mm512_cvt_roundps_pd (__m256 __A, const int __R)
7782{
7783 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7784 (__v8df)
0fc245cd 7785 _mm512_undefined_pd (),
e2098065 7786 (__mmask8) -1, __R);
7787}
7788
7789extern __inline __m512d
7790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7791_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7792 const int __R)
7793{
7794 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7795 (__v8df) __W,
7796 (__mmask8) __U, __R);
7797}
7798
7799extern __inline __m512d
7800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7801_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7802{
7803 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7804 (__v8df)
7805 _mm512_setzero_pd (),
7806 (__mmask8) __U, __R);
7807}
7808
7809extern __inline __m512
7810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7811_mm512_cvt_roundph_ps (__m256i __A, const int __R)
7812{
7813 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7814 (__v16sf)
0fc245cd 7815 _mm512_undefined_ps (),
e2098065 7816 (__mmask16) -1, __R);
7817}
7818
7819extern __inline __m512
7820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7821_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7822 const int __R)
7823{
7824 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7825 (__v16sf) __W,
7826 (__mmask16) __U, __R);
7827}
7828
7829extern __inline __m512
7830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7831_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7832{
7833 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7834 (__v16sf)
7835 _mm512_setzero_ps (),
7836 (__mmask16) __U, __R);
7837}
7838
7839extern __inline __m256i
7840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841_mm512_cvt_roundps_ph (__m512 __A, const int __I)
7842{
7843 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7844 __I,
7845 (__v16hi)
0fc245cd 7846 _mm256_undefined_si256 (),
e2098065 7847 -1);
7848}
7849
7850extern __inline __m256i
7851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7852_mm512_cvtps_ph (__m512 __A, const int __I)
7853{
7854 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7855 __I,
7856 (__v16hi)
0fc245cd 7857 _mm256_undefined_si256 (),
e2098065 7858 -1);
7859}
7860
7861extern __inline __m256i
7862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7863_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7864 const int __I)
7865{
7866 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7867 __I,
7868 (__v16hi) __U,
7869 (__mmask16) __W);
7870}
7871
7872extern __inline __m256i
7873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7874_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7875{
7876 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7877 __I,
7878 (__v16hi) __U,
7879 (__mmask16) __W);
7880}
7881
7882extern __inline __m256i
7883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7884_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7885{
7886 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7887 __I,
7888 (__v16hi)
7889 _mm256_setzero_si256 (),
7890 (__mmask16) __W);
7891}
7892
7893extern __inline __m256i
7894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7895_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7896{
7897 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7898 __I,
7899 (__v16hi)
7900 _mm256_setzero_si256 (),
7901 (__mmask16) __W);
7902}
7903#else
7904#define _mm512_cvt_roundps_pd(A, B) \
0fc245cd 7905 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
e2098065 7906
7907#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7908 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7909
7910#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7911 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7912
7913#define _mm512_cvt_roundph_ps(A, B) \
0fc245cd 7914 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
e2098065 7915
7916#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7917 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7918
7919#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7920 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7921
7922#define _mm512_cvt_roundps_ph(A, I) \
7923 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0fc245cd 7924 (__v16hi)_mm256_undefined_si256 (), -1))
e2098065 7925#define _mm512_cvtps_ph(A, I) \
7926 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0fc245cd 7927 (__v16hi)_mm256_undefined_si256 (), -1))
e2098065 7928#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7929 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7930 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7931#define _mm512_mask_cvtps_ph(U, W, A, I) \
7932 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7933 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7934#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7935 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7936 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7937#define _mm512_maskz_cvtps_ph(W, A, I) \
7938 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7939 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7940#endif
7941
7942#ifdef __OPTIMIZE__
7943extern __inline __m256
7944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7945_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7946{
7947 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7948 (__v8sf)
0fc245cd 7949 _mm256_undefined_ps (),
e2098065 7950 (__mmask8) -1, __R);
7951}
7952
7953extern __inline __m256
7954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7955_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7956 const int __R)
7957{
7958 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7959 (__v8sf) __W,
7960 (__mmask8) __U, __R);
7961}
7962
7963extern __inline __m256
7964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7965_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7966{
7967 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7968 (__v8sf)
7969 _mm256_setzero_ps (),
7970 (__mmask8) __U, __R);
7971}
7972
0b7cc9c6 7973extern __inline __m128
7974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7975_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7976{
7977 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7978 (__v2df) __B,
7979 __R);
7980}
7981
7982extern __inline __m128d
7983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7984_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7985{
7986 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7987 (__v4sf) __B,
7988 __R);
7989}
e2098065 7990#else
7991#define _mm512_cvt_roundpd_ps(A, B) \
0fc245cd 7992 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
e2098065 7993
7994#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7995 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7996
7997#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7998 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
0b7cc9c6 7999
8000#define _mm_cvt_roundsd_ss(A, B, C) \
8001 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8002
8003#define _mm_cvt_roundss_sd(A, B, C) \
8004 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
e2098065 8005#endif
8006
8007extern __inline void
8008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8009_mm512_stream_si512 (__m512i * __P, __m512i __A)
8010{
8011 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8012}
8013
8014extern __inline void
8015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8016_mm512_stream_ps (float *__P, __m512 __A)
8017{
8018 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8019}
8020
8021extern __inline void
8022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8023_mm512_stream_pd (double *__P, __m512d __A)
8024{
8025 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8026}
8027
fbf4df62 8028extern __inline __m512i
8029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8030_mm512_stream_load_si512 (void *__P)
8031{
8032 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8033}
8034
2bd17c0c 8035/* Constants for mantissa extraction */
8036typedef enum
8037{
8038 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8039 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8040 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8041 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8042} _MM_MANTISSA_NORM_ENUM;
8043
8044typedef enum
8045{
8046 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8047 _MM_MANT_SIGN_zero, /* sign = 0 */
8048 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8049} _MM_MANTISSA_SIGN_ENUM;
8050
e2098065 8051#ifdef __OPTIMIZE__
0b7cc9c6 8052extern __inline __m128
8053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8055{
8056 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8057 (__v4sf) __B,
8058 __R);
8059}
8060
8061extern __inline __m128d
8062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8063_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8064{
8065 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8066 (__v2df) __B,
8067 __R);
8068}
8069
e2098065 8070extern __inline __m512
8071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8072_mm512_getexp_round_ps (__m512 __A, const int __R)
8073{
8074 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8075 (__v16sf)
0fc245cd 8076 _mm512_undefined_ps (),
e2098065 8077 (__mmask16) -1, __R);
8078}
8079
8080extern __inline __m512
8081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8082_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8083 const int __R)
8084{
8085 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8086 (__v16sf) __W,
8087 (__mmask16) __U, __R);
8088}
8089
8090extern __inline __m512
8091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8092_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8093{
8094 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8095 (__v16sf)
8096 _mm512_setzero_ps (),
8097 (__mmask16) __U, __R);
8098}
8099
8100extern __inline __m512d
8101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102_mm512_getexp_round_pd (__m512d __A, const int __R)
8103{
8104 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8105 (__v8df)
0fc245cd 8106 _mm512_undefined_pd (),
e2098065 8107 (__mmask8) -1, __R);
8108}
8109
8110extern __inline __m512d
8111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8112_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8113 const int __R)
8114{
8115 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8116 (__v8df) __W,
8117 (__mmask8) __U, __R);
8118}
8119
8120extern __inline __m512d
8121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8122_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8123{
8124 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8125 (__v8df)
8126 _mm512_setzero_pd (),
8127 (__mmask8) __U, __R);
8128}
8129
e2098065 8130extern __inline __m512d
8131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8132_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8133 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8134{
8135 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8136 (__C << 2) | __B,
0fc245cd 8137 _mm512_undefined_pd (),
e2098065 8138 (__mmask8) -1, __R);
8139}
8140
8141extern __inline __m512d
8142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8143_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8144 _MM_MANTISSA_NORM_ENUM __B,
8145 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8146{
8147 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8148 (__C << 2) | __B,
8149 (__v8df) __W, __U,
8150 __R);
8151}
8152
8153extern __inline __m512d
8154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8156 _MM_MANTISSA_NORM_ENUM __B,
8157 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8158{
8159 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8160 (__C << 2) | __B,
8161 (__v8df)
8162 _mm512_setzero_pd (),
8163 __U, __R);
8164}
8165
8166extern __inline __m512
8167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8168_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8169 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8170{
8171 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8172 (__C << 2) | __B,
0fc245cd 8173 _mm512_undefined_ps (),
e2098065 8174 (__mmask16) -1, __R);
8175}
8176
8177extern __inline __m512
8178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8179_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8180 _MM_MANTISSA_NORM_ENUM __B,
8181 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8182{
8183 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8184 (__C << 2) | __B,
8185 (__v16sf) __W, __U,
8186 __R);
8187}
8188
8189extern __inline __m512
8190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8191_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8192 _MM_MANTISSA_NORM_ENUM __B,
8193 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8194{
8195 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8196 (__C << 2) | __B,
8197 (__v16sf)
8198 _mm512_setzero_ps (),
8199 __U, __R);
8200}
8201
0b7cc9c6 8202extern __inline __m128d
8203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8204_mm_getmant_round_sd (__m128d __A, __m128d __B,
8205 _MM_MANTISSA_NORM_ENUM __C,
8206 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8207{
8208 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8209 (__v2df) __B,
8210 (__D << 2) | __C,
8211 __R);
8212}
8213
8214extern __inline __m128
8215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8216_mm_getmant_round_ss (__m128 __A, __m128 __B,
8217 _MM_MANTISSA_NORM_ENUM __C,
8218 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8219{
8220 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8221 (__v4sf) __B,
8222 (__D << 2) | __C,
8223 __R);
8224}
8225
e2098065 8226#else
8227#define _mm512_getmant_round_pd(X, B, C, R) \
8228 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8229 (int)(((C)<<2) | (B)), \
0fc245cd 8230 (__v8df)(__m512d)_mm512_undefined_pd(), \
e2098065 8231 (__mmask8)-1,\
8232 (R)))
8233
8234#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8235 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8236 (int)(((C)<<2) | (B)), \
8237 (__v8df)(__m512d)(W), \
8238 (__mmask8)(U),\
8239 (R)))
8240
8241#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8242 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8243 (int)(((C)<<2) | (B)), \
8244 (__v8df)(__m512d)_mm512_setzero_pd(), \
8245 (__mmask8)(U),\
8246 (R)))
8247#define _mm512_getmant_round_ps(X, B, C, R) \
8248 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8249 (int)(((C)<<2) | (B)), \
0fc245cd 8250 (__v16sf)(__m512)_mm512_undefined_ps(), \
e2098065 8251 (__mmask16)-1,\
8252 (R)))
8253
8254#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8255 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8256 (int)(((C)<<2) | (B)), \
8257 (__v16sf)(__m512)(W), \
8258 (__mmask16)(U),\
8259 (R)))
8260
8261#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8262 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8263 (int)(((C)<<2) | (B)), \
8264 (__v16sf)(__m512)_mm512_setzero_ps(), \
8265 (__mmask16)(U),\
8266 (R)))
0b7cc9c6 8267#define _mm_getmant_round_sd(X, Y, C, D, R) \
8268 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8269 (__v2df)(__m128d)(Y), \
8270 (int)(((D)<<2) | (C)), \
8271 (R)))
8272
8273#define _mm_getmant_round_ss(X, Y, C, D, R) \
8274 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8275 (__v4sf)(__m128)(Y), \
8276 (int)(((D)<<2) | (C)), \
8277 (R)))
8278
8279#define _mm_getexp_round_ss(A, B, R) \
8280 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8281
8282#define _mm_getexp_round_sd(A, B, R) \
8283 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8284
e2098065 8285#define _mm512_getexp_round_ps(A, R) \
8286 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0fc245cd 8287 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
e2098065 8288
8289#define _mm512_mask_getexp_round_ps(W, U, A, R) \
8290 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8291 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8292
8293#define _mm512_maskz_getexp_round_ps(U, A, R) \
8294 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8295 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8296
8297#define _mm512_getexp_round_pd(A, R) \
8298 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0fc245cd 8299 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
e2098065 8300
8301#define _mm512_mask_getexp_round_pd(W, U, A, R) \
8302 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8303 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8304
8305#define _mm512_maskz_getexp_round_pd(U, A, R) \
8306 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8307 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8308#endif
8309
8310#ifdef __OPTIMIZE__
8311extern __inline __m512
8312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8313_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8314{
8315 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0fc245cd 8316 (__v16sf)
8317 _mm512_undefined_ps (),
8318 -1, __R);
e2098065 8319}
8320
8321extern __inline __m512
8322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8323_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8324 const int __imm, const int __R)
8325{
8326 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8327 (__v16sf) __A,
8328 (__mmask16) __B, __R);
8329}
8330
8331extern __inline __m512
8332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8333_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8334 const int __imm, const int __R)
8335{
8336 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8337 __imm,
8338 (__v16sf)
8339 _mm512_setzero_ps (),
8340 (__mmask16) __A, __R);
8341}
8342
8343extern __inline __m512d
8344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8345_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8346{
8347 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0fc245cd 8348 (__v8df)
8349 _mm512_undefined_pd (),
8350 -1, __R);
e2098065 8351}
8352
8353extern __inline __m512d
8354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8355_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8356 __m512d __C, const int __imm, const int __R)
8357{
8358 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8359 (__v8df) __A,
8360 (__mmask8) __B, __R);
8361}
8362
8363extern __inline __m512d
8364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8366 const int __imm, const int __R)
8367{
8368 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8369 __imm,
8370 (__v8df)
8371 _mm512_setzero_pd (),
8372 (__mmask8) __A, __R);
8373}
0b7cc9c6 8374
8375extern __inline __m128
8376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8377_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8378{
8379 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8380 (__v4sf) __B, __imm, __R);
8381}
8382
8383extern __inline __m128d
8384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8385_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8386 const int __R)
8387{
8388 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8389 (__v2df) __B, __imm, __R);
8390}
8391
e2098065 8392#else
8393#define _mm512_roundscale_round_ps(A, B, R) \
8394 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0fc245cd 8395 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
e2098065 8396#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8397 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8398 (int)(D), \
8399 (__v16sf)(__m512)(A), \
8400 (__mmask16)(B), R))
8401#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8402 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8403 (int)(C), \
8404 (__v16sf)_mm512_setzero_ps(),\
8405 (__mmask16)(A), R))
8406#define _mm512_roundscale_round_pd(A, B, R) \
8407 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0fc245cd 8408 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
e2098065 8409#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8410 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8411 (int)(D), \
8412 (__v8df)(__m512d)(A), \
8413 (__mmask8)(B), R))
8414#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8415 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8416 (int)(C), \
8417 (__v8df)_mm512_setzero_pd(),\
8418 (__mmask8)(A), R))
0b7cc9c6 8419#define _mm_roundscale_round_ss(A, B, C, R) \
8420 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8421 (__v4sf)(__m128)(B), (int)(C), R))
8422#define _mm_roundscale_round_sd(A, B, C, R) \
8423 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8424 (__v2df)(__m128d)(B), (int)(C), R))
e2098065 8425#endif
8426
8427extern __inline __m512
8428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8429_mm512_floor_ps (__m512 __A)
8430{
8431 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8432 _MM_FROUND_FLOOR,
8433 (__v16sf) __A, -1,
8434 _MM_FROUND_CUR_DIRECTION);
8435}
8436
8437extern __inline __m512d
8438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8439_mm512_floor_pd (__m512d __A)
8440{
8441 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8442 _MM_FROUND_FLOOR,
8443 (__v8df) __A, -1,
8444 _MM_FROUND_CUR_DIRECTION);
8445}
8446
8447extern __inline __m512
8448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8449_mm512_ceil_ps (__m512 __A)
8450{
8451 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8452 _MM_FROUND_CEIL,
8453 (__v16sf) __A, -1,
8454 _MM_FROUND_CUR_DIRECTION);
8455}
8456
8457extern __inline __m512d
8458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8459_mm512_ceil_pd (__m512d __A)
8460{
8461 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8462 _MM_FROUND_CEIL,
8463 (__v8df) __A, -1,
8464 _MM_FROUND_CUR_DIRECTION);
8465}
8466
8467extern __inline __m512
8468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8470{
8471 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8472 _MM_FROUND_FLOOR,
8473 (__v16sf) __W, __U,
8474 _MM_FROUND_CUR_DIRECTION);
8475}
8476
8477extern __inline __m512d
8478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8479_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8480{
8481 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8482 _MM_FROUND_FLOOR,
8483 (__v8df) __W, __U,
8484 _MM_FROUND_CUR_DIRECTION);
8485}
8486
8487extern __inline __m512
8488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8489_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8490{
8491 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8492 _MM_FROUND_CEIL,
8493 (__v16sf) __W, __U,
8494 _MM_FROUND_CUR_DIRECTION);
8495}
8496
8497extern __inline __m512d
8498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8499_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8500{
8501 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8502 _MM_FROUND_CEIL,
8503 (__v8df) __W, __U,
8504 _MM_FROUND_CUR_DIRECTION);
8505}
8506
e2098065 8507#ifdef __OPTIMIZE__
e2098065 8508extern __inline __m512i
8509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8510_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8511{
8512 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8513 (__v16si) __B, __imm,
8514 (__v16si)
0fc245cd 8515 _mm512_undefined_si512 (),
e2098065 8516 (__mmask16) -1);
8517}
8518
8519extern __inline __m512i
8520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8521_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8522 __m512i __B, const int __imm)
8523{
8524 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8525 (__v16si) __B, __imm,
8526 (__v16si) __W,
8527 (__mmask16) __U);
8528}
8529
8530extern __inline __m512i
8531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8532_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8533 const int __imm)
8534{
8535 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8536 (__v16si) __B, __imm,
8537 (__v16si)
8538 _mm512_setzero_si512 (),
8539 (__mmask16) __U);
8540}
8541
8542extern __inline __m512i
8543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8545{
8546 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8547 (__v8di) __B, __imm,
8548 (__v8di)
0fc245cd 8549 _mm512_undefined_si512 (),
e2098065 8550 (__mmask8) -1);
8551}
8552
8553extern __inline __m512i
8554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8556 __m512i __B, const int __imm)
8557{
8558 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8559 (__v8di) __B, __imm,
8560 (__v8di) __W,
8561 (__mmask8) __U);
8562}
8563
8564extern __inline __m512i
8565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8567 const int __imm)
8568{
8569 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8570 (__v8di) __B, __imm,
8571 (__v8di)
8572 _mm512_setzero_si512 (),
8573 (__mmask8) __U);
8574}
8575#else
e2098065 8576#define _mm512_alignr_epi32(X, Y, C) \
8577 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
0fc245cd 8578 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
e2098065 8579 (__mmask16)-1))
8580
8581#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8582 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8583 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8584 (__mmask16)(U)))
8585
8586#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8587 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
0fc245cd 8588 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
e2098065 8589 (__mmask16)(U)))
8590
8591#define _mm512_alignr_epi64(X, Y, C) \
8592 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
0fc245cd 8593 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \
8594 (__mmask8)-1))
e2098065 8595
8596#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8597 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8598 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8599
8600#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8601 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
0fc245cd 8602 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
e2098065 8603 (__mmask8)(U)))
8604#endif
8605
8606extern __inline __mmask16
8607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8609{
8610 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8611 (__v16si) __B,
8612 (__mmask16) -1);
8613}
8614
8615extern __inline __mmask16
8616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8617_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8618{
8619 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8620 (__v16si) __B, __U);
8621}
8622
8623extern __inline __mmask8
8624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8625_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8626{
8627 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8628 (__v8di) __B, __U);
8629}
8630
8631extern __inline __mmask8
8632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8633_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8634{
8635 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8636 (__v8di) __B,
8637 (__mmask8) -1);
8638}
8639
8640extern __inline __mmask16
8641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8643{
8644 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8645 (__v16si) __B,
8646 (__mmask16) -1);
8647}
8648
8649extern __inline __mmask16
8650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8652{
8653 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8654 (__v16si) __B, __U);
8655}
8656
8657extern __inline __mmask8
8658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8660{
8661 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8662 (__v8di) __B, __U);
8663}
8664
8665extern __inline __mmask8
8666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8667_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8668{
8669 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8670 (__v8di) __B,
8671 (__mmask8) -1);
8672}
8673
f4a19f2a 8674extern __inline __mmask16
8675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8676_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8677{
8678 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8679 (__v16si) __Y, 5,
8680 (__mmask16) -1);
8681}
8682
889d21f6 8683extern __inline __mmask16
8684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8685_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8686{
8687 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8688 (__v16si) __Y, 5,
8689 (__mmask16) __M);
8690}
8691
8692extern __inline __mmask16
8693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8694_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8695{
8696 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8697 (__v16si) __Y, 5,
8698 (__mmask16) __M);
8699}
8700
f4a19f2a 8701extern __inline __mmask16
8702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8704{
8705 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8706 (__v16si) __Y, 5,
8707 (__mmask16) -1);
8708}
8709
889d21f6 8710extern __inline __mmask8
8711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8713{
8714 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8715 (__v8di) __Y, 5,
8716 (__mmask8) __M);
8717}
8718
f4a19f2a 8719extern __inline __mmask8
8720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8721_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8722{
8723 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8724 (__v8di) __Y, 5,
8725 (__mmask8) -1);
8726}
8727
889d21f6 8728extern __inline __mmask8
8729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8731{
8732 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8733 (__v8di) __Y, 5,
8734 (__mmask8) __M);
8735}
8736
f4a19f2a 8737extern __inline __mmask8
8738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8739_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8740{
8741 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8742 (__v8di) __Y, 5,
8743 (__mmask8) -1);
8744}
8745
889d21f6 8746extern __inline __mmask16
8747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8748_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8749{
8750 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8751 (__v16si) __Y, 2,
8752 (__mmask16) __M);
8753}
8754
f4a19f2a 8755extern __inline __mmask16
8756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8757_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8758{
8759 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8760 (__v16si) __Y, 2,
8761 (__mmask16) -1);
8762}
8763
889d21f6 8764extern __inline __mmask16
8765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8766_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8767{
8768 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8769 (__v16si) __Y, 2,
8770 (__mmask16) __M);
8771}
8772
f4a19f2a 8773extern __inline __mmask16
8774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8775_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8776{
8777 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8778 (__v16si) __Y, 2,
8779 (__mmask16) -1);
8780}
8781
889d21f6 8782extern __inline __mmask8
8783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8784_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8785{
8786 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8787 (__v8di) __Y, 2,
8788 (__mmask8) __M);
8789}
8790
f4a19f2a 8791extern __inline __mmask8
8792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8793_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8794{
8795 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8796 (__v8di) __Y, 2,
8797 (__mmask8) -1);
8798}
8799
889d21f6 8800extern __inline __mmask8
8801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8803{
8804 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8805 (__v8di) __Y, 2,
8806 (__mmask8) __M);
8807}
8808
f4a19f2a 8809extern __inline __mmask8
8810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8812{
8813 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8814 (__v8di) __Y, 2,
8815 (__mmask8) -1);
8816}
8817
889d21f6 8818extern __inline __mmask16
8819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8820_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8821{
8822 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8823 (__v16si) __Y, 1,
8824 (__mmask16) __M);
8825}
8826
f4a19f2a 8827extern __inline __mmask16
8828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8829_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8830{
8831 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8832 (__v16si) __Y, 1,
8833 (__mmask16) -1);
8834}
8835
889d21f6 8836extern __inline __mmask16
8837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8839{
8840 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8841 (__v16si) __Y, 1,
8842 (__mmask16) __M);
8843}
8844
f4a19f2a 8845extern __inline __mmask16
8846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8847_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8848{
8849 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8850 (__v16si) __Y, 1,
8851 (__mmask16) -1);
8852}
8853
889d21f6 8854extern __inline __mmask8
8855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8856_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8857{
8858 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8859 (__v8di) __Y, 1,
8860 (__mmask8) __M);
8861}
8862
f4a19f2a 8863extern __inline __mmask8
8864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8865_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8866{
8867 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8868 (__v8di) __Y, 1,
8869 (__mmask8) -1);
8870}
8871
889d21f6 8872extern __inline __mmask8
8873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8874_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8875{
8876 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8877 (__v8di) __Y, 1,
8878 (__mmask8) __M);
8879}
8880
f4a19f2a 8881extern __inline __mmask8
8882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8883_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8884{
8885 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8886 (__v8di) __Y, 1,
8887 (__mmask8) -1);
8888}
8889
8890extern __inline __mmask16
8891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8892_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8893{
8894 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8895 (__v16si) __Y, 4,
8896 (__mmask16) -1);
8897}
8898
889d21f6 8899extern __inline __mmask16
8900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8902{
8903 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8904 (__v16si) __Y, 4,
8905 (__mmask16) __M);
8906}
8907
8908extern __inline __mmask16
8909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8911{
8912 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8913 (__v16si) __Y, 4,
8914 (__mmask16) __M);
8915}
8916
f4a19f2a 8917extern __inline __mmask16
8918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8919_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8920{
8921 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8922 (__v16si) __Y, 4,
8923 (__mmask16) -1);
8924}
8925
889d21f6 8926extern __inline __mmask8
8927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8928_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8929{
8930 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8931 (__v8di) __Y, 4,
8932 (__mmask8) __M);
8933}
8934
f4a19f2a 8935extern __inline __mmask8
8936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8937_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8938{
8939 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8940 (__v8di) __Y, 4,
8941 (__mmask8) -1);
8942}
8943
889d21f6 8944extern __inline __mmask8
8945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8946_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8947{
8948 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8949 (__v8di) __Y, 4,
8950 (__mmask8) __M);
8951}
8952
f4a19f2a 8953extern __inline __mmask8
8954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8955_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8956{
8957 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8958 (__v8di) __Y, 4,
8959 (__mmask8) -1);
8960}
8961
e2098065 8962#define _MM_CMPINT_EQ 0x0
8963#define _MM_CMPINT_LT 0x1
8964#define _MM_CMPINT_LE 0x2
8965#define _MM_CMPINT_UNUSED 0x3
8966#define _MM_CMPINT_NE 0x4
8967#define _MM_CMPINT_NLT 0x5
8968#define _MM_CMPINT_GE 0x5
8969#define _MM_CMPINT_NLE 0x6
8970#define _MM_CMPINT_GT 0x6
8971
8972#ifdef __OPTIMIZE__
8973extern __inline __mmask8
8974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8975_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8976{
8977 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8978 (__v8di) __Y, __P,
8979 (__mmask8) -1);
8980}
8981
8982extern __inline __mmask16
8983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8985{
8986 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8987 (__v16si) __Y, __P,
8988 (__mmask16) -1);
8989}
8990
8991extern __inline __mmask8
8992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8993_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8994{
8995 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8996 (__v8di) __Y, __P,
8997 (__mmask8) -1);
8998}
8999
9000extern __inline __mmask16
9001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9002_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9003{
9004 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9005 (__v16si) __Y, __P,
9006 (__mmask16) -1);
9007}
9008
9009extern __inline __mmask8
9010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9011_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9012 const int __R)
9013{
9014 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9015 (__v8df) __Y, __P,
9016 (__mmask8) -1, __R);
9017}
9018
9019extern __inline __mmask16
9020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9021_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9022{
9023 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9024 (__v16sf) __Y, __P,
9025 (__mmask16) -1, __R);
9026}
9027
9028extern __inline __mmask8
9029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9030_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9031 const int __P)
9032{
9033 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9034 (__v8di) __Y, __P,
9035 (__mmask8) __U);
9036}
9037
9038extern __inline __mmask16
9039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9040_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9041 const int __P)
9042{
9043 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9044 (__v16si) __Y, __P,
9045 (__mmask16) __U);
9046}
9047
9048extern __inline __mmask8
9049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9050_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9051 const int __P)
9052{
9053 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9054 (__v8di) __Y, __P,
9055 (__mmask8) __U);
9056}
9057
9058extern __inline __mmask16
9059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9060_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9061 const int __P)
9062{
9063 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9064 (__v16si) __Y, __P,
9065 (__mmask16) __U);
9066}
9067
9068extern __inline __mmask8
9069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9070_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9071 const int __P, const int __R)
9072{
9073 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9074 (__v8df) __Y, __P,
9075 (__mmask8) __U, __R);
9076}
9077
9078extern __inline __mmask16
9079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9080_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9081 const int __P, const int __R)
9082{
9083 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9084 (__v16sf) __Y, __P,
9085 (__mmask16) __U, __R);
9086}
9087
9088extern __inline __mmask8
9089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9090_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9091{
9092 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9093 (__v2df) __Y, __P,
9094 (__mmask8) -1, __R);
9095}
9096
9097extern __inline __mmask8
9098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9099_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9100 const int __P, const int __R)
9101{
9102 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9103 (__v2df) __Y, __P,
9104 (__mmask8) __M, __R);
9105}
9106
9107extern __inline __mmask8
9108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9109_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9110{
9111 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9112 (__v4sf) __Y, __P,
9113 (__mmask8) -1, __R);
9114}
9115
9116extern __inline __mmask8
9117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9119 const int __P, const int __R)
9120{
9121 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9122 (__v4sf) __Y, __P,
9123 (__mmask8) __M, __R);
9124}
9125
9126#else
9127#define _mm512_cmp_epi64_mask(X, Y, P) \
9128 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9129 (__v8di)(__m512i)(Y), (int)(P),\
9130 (__mmask8)-1))
9131
9132#define _mm512_cmp_epi32_mask(X, Y, P) \
9133 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9134 (__v16si)(__m512i)(Y), (int)(P),\
9135 (__mmask16)-1))
9136
9137#define _mm512_cmp_epu64_mask(X, Y, P) \
9138 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9139 (__v8di)(__m512i)(Y), (int)(P),\
9140 (__mmask8)-1))
9141
9142#define _mm512_cmp_epu32_mask(X, Y, P) \
9143 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9144 (__v16si)(__m512i)(Y), (int)(P),\
9145 (__mmask16)-1))
9146
9147#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9148 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9149 (__v8df)(__m512d)(Y), (int)(P),\
9150 (__mmask8)-1, R))
9151
9152#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9153 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9154 (__v16sf)(__m512)(Y), (int)(P),\
9155 (__mmask16)-1, R))
9156
9157#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9158 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9159 (__v8di)(__m512i)(Y), (int)(P),\
9160 (__mmask8)M))
9161
9162#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9163 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9164 (__v16si)(__m512i)(Y), (int)(P),\
9165 (__mmask16)M))
9166
9167#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9168 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9169 (__v8di)(__m512i)(Y), (int)(P),\
9170 (__mmask8)M))
9171
9172#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9173 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9174 (__v16si)(__m512i)(Y), (int)(P),\
9175 (__mmask16)M))
9176
9177#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9178 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9179 (__v8df)(__m512d)(Y), (int)(P),\
9180 (__mmask8)M, R))
9181
9182#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9183 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9184 (__v16sf)(__m512)(Y), (int)(P),\
9185 (__mmask16)M, R))
9186
9187#define _mm_cmp_round_sd_mask(X, Y, P, R) \
9188 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9189 (__v2df)(__m128d)(Y), (int)(P),\
9190 (__mmask8)-1, R))
9191
9192#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9193 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9194 (__v2df)(__m128d)(Y), (int)(P),\
9195 (M), R))
9196
9197#define _mm_cmp_round_ss_mask(X, Y, P, R) \
9198 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9199 (__v4sf)(__m128)(Y), (int)(P), \
9200 (__mmask8)-1, R))
9201
9202#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9203 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9204 (__v4sf)(__m128)(Y), (int)(P), \
9205 (M), R))
9206#endif
9207
9208#ifdef __OPTIMIZE__
9209extern __inline __m512
9210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9211_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9212{
0fc245cd 9213 __m512 v1_old = _mm512_undefined_ps ();
e2098065 9214 __mmask16 mask = 0xFFFF;
9215
9216 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9217 __addr,
9218 (__v16si) __index,
9219 mask, __scale);
9220}
9221
9222extern __inline __m512
9223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9224_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9225 __m512i __index, float const *__addr, int __scale)
9226{
9227 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9228 __addr,
9229 (__v16si) __index,
9230 __mask, __scale);
9231}
9232
9233extern __inline __m512d
9234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9235_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9236{
0fc245cd 9237 __m512d v1_old = _mm512_undefined_pd ();
e2098065 9238 __mmask8 mask = 0xFF;
9239
9240 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9241 __addr,
9242 (__v8si) __index, mask,
9243 __scale);
9244}
9245
9246extern __inline __m512d
9247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9248_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9249 __m256i __index, double const *__addr, int __scale)
9250{
9251 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9252 __addr,
9253 (__v8si) __index,
9254 __mask, __scale);
9255}
9256
9257extern __inline __m256
9258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9259_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9260{
0fc245cd 9261 __m256 v1_old = _mm256_undefined_ps ();
e2098065 9262 __mmask8 mask = 0xFF;
9263
9264 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9265 __addr,
9266 (__v8di) __index, mask,
9267 __scale);
9268}
9269
9270extern __inline __m256
9271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9272_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9273 __m512i __index, float const *__addr, int __scale)
9274{
9275 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9276 __addr,
9277 (__v8di) __index,
9278 __mask, __scale);
9279}
9280
9281extern __inline __m512d
9282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9284{
0fc245cd 9285 __m512d v1_old = _mm512_undefined_pd ();
e2098065 9286 __mmask8 mask = 0xFF;
9287
9288 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9289 __addr,
9290 (__v8di) __index, mask,
9291 __scale);
9292}
9293
9294extern __inline __m512d
9295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9296_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9297 __m512i __index, double const *__addr, int __scale)
9298{
9299 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9300 __addr,
9301 (__v8di) __index,
9302 __mask, __scale);
9303}
9304
9305extern __inline __m512i
9306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9307_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9308{
0fc245cd 9309 __m512i v1_old = _mm512_undefined_si512 ();
e2098065 9310 __mmask16 mask = 0xFFFF;
9311
9312 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9313 __addr,
9314 (__v16si) __index,
9315 mask, __scale);
9316}
9317
9318extern __inline __m512i
9319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9320_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9321 __m512i __index, int const *__addr, int __scale)
9322{
9323 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9324 __addr,
9325 (__v16si) __index,
9326 __mask, __scale);
9327}
9328
9329extern __inline __m512i
9330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9331_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9332{
0fc245cd 9333 __m512i v1_old = _mm512_undefined_si512 ();
e2098065 9334 __mmask8 mask = 0xFF;
9335
9336 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9337 __addr,
9338 (__v8si) __index, mask,
9339 __scale);
9340}
9341
9342extern __inline __m512i
9343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9344_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9345 __m256i __index, long long const *__addr,
9346 int __scale)
9347{
9348 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9349 __addr,
9350 (__v8si) __index,
9351 __mask, __scale);
9352}
9353
9354extern __inline __m256i
9355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9356_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9357{
0fc245cd 9358 __m256i v1_old = _mm256_undefined_si256 ();
e2098065 9359 __mmask8 mask = 0xFF;
9360
9361 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9362 __addr,
9363 (__v8di) __index,
9364 mask, __scale);
9365}
9366
9367extern __inline __m256i
9368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9369_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9370 __m512i __index, int const *__addr, int __scale)
9371{
9372 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9373 __addr,
9374 (__v8di) __index,
9375 __mask, __scale);
9376}
9377
9378extern __inline __m512i
9379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9380_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9381{
0fc245cd 9382 __m512i v1_old = _mm512_undefined_si512 ();
e2098065 9383 __mmask8 mask = 0xFF;
9384
9385 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9386 __addr,
9387 (__v8di) __index, mask,
9388 __scale);
9389}
9390
9391extern __inline __m512i
9392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9393_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9394 __m512i __index, long long const *__addr,
9395 int __scale)
9396{
9397 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9398 __addr,
9399 (__v8di) __index,
9400 __mask, __scale);
9401}
9402
9403extern __inline void
9404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9405_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9406{
9407 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9408 (__v16si) __index, (__v16sf) __v1, __scale);
9409}
9410
9411extern __inline void
9412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9414 __m512i __index, __m512 __v1, int __scale)
9415{
9416 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9417 (__v16sf) __v1, __scale);
9418}
9419
9420extern __inline void
9421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9422_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9423 int __scale)
9424{
9425 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9426 (__v8si) __index, (__v8df) __v1, __scale);
9427}
9428
9429extern __inline void
9430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9431_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9432 __m256i __index, __m512d __v1, int __scale)
9433{
9434 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9435 (__v8df) __v1, __scale);
9436}
9437
9438extern __inline void
9439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9440_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9441{
9442 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9443 (__v8di) __index, (__v8sf) __v1, __scale);
9444}
9445
9446extern __inline void
9447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9448_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9449 __m512i __index, __m256 __v1, int __scale)
9450{
9451 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9452 (__v8sf) __v1, __scale);
9453}
9454
9455extern __inline void
9456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9457_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9458 int __scale)
9459{
9460 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9461 (__v8di) __index, (__v8df) __v1, __scale);
9462}
9463
9464extern __inline void
9465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9466_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9467 __m512i __index, __m512d __v1, int __scale)
9468{
9469 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9470 (__v8df) __v1, __scale);
9471}
9472
9473extern __inline void
9474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475_mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9476 __m512i __v1, int __scale)
9477{
9478 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9479 (__v16si) __index, (__v16si) __v1, __scale);
9480}
9481
9482extern __inline void
9483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9485 __m512i __index, __m512i __v1, int __scale)
9486{
9487 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9488 (__v16si) __v1, __scale);
9489}
9490
9491extern __inline void
9492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493_mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9494 __m512i __v1, int __scale)
9495{
9496 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9497 (__v8si) __index, (__v8di) __v1, __scale);
9498}
9499
9500extern __inline void
9501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9502_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9503 __m256i __index, __m512i __v1, int __scale)
9504{
9505 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9506 (__v8di) __v1, __scale);
9507}
9508
9509extern __inline void
9510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511_mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9512 __m256i __v1, int __scale)
9513{
9514 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9515 (__v8di) __index, (__v8si) __v1, __scale);
9516}
9517
9518extern __inline void
9519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9520_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9521 __m512i __index, __m256i __v1, int __scale)
9522{
9523 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9524 (__v8si) __v1, __scale);
9525}
9526
9527extern __inline void
9528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9529_mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9530 __m512i __v1, int __scale)
9531{
9532 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9533 (__v8di) __index, (__v8di) __v1, __scale);
9534}
9535
9536extern __inline void
9537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9538_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9539 __m512i __index, __m512i __v1, int __scale)
9540{
9541 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9542 (__v8di) __v1, __scale);
9543}
9544#else
9545#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
0fc245cd 9546 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
e2098065 9547 (float const *)ADDR, \
9548 (__v16si)(__m512i)INDEX, \
9549 (__mmask16)0xFFFF, (int)SCALE)
9550
9551#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9552 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9553 (float const *)ADDR, \
9554 (__v16si)(__m512i)INDEX, \
9555 (__mmask16)MASK, (int)SCALE)
9556
9557#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
0fc245cd 9558 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
e2098065 9559 (double const *)ADDR, \
9560 (__v8si)(__m256i)INDEX, \
9561 (__mmask8)0xFF, (int)SCALE)
9562
9563#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9564 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9565 (double const *)ADDR, \
9566 (__v8si)(__m256i)INDEX, \
9567 (__mmask8)MASK, (int)SCALE)
9568
9569#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
0fc245cd 9570 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
e2098065 9571 (float const *)ADDR, \
9572 (__v8di)(__m512i)INDEX, \
9573 (__mmask8)0xFF, (int)SCALE)
9574
9575#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9576 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9577 (float const *)ADDR, \
9578 (__v8di)(__m512i)INDEX, \
9579 (__mmask8)MASK, (int)SCALE)
9580
9581#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
0fc245cd 9582 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
e2098065 9583 (double const *)ADDR, \
9584 (__v8di)(__m512i)INDEX, \
9585 (__mmask8)0xFF, (int)SCALE)
9586
9587#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9588 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9589 (double const *)ADDR, \
9590 (__v8di)(__m512i)INDEX, \
9591 (__mmask8)MASK, (int)SCALE)
9592
9593#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
0fc245cd 9594 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \
e2098065 9595 (int const *)ADDR, \
9596 (__v16si)(__m512i)INDEX, \
9597 (__mmask16)0xFFFF, (int)SCALE)
9598
9599#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9600 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9601 (int const *)ADDR, \
9602 (__v16si)(__m512i)INDEX, \
9603 (__mmask16)MASK, (int)SCALE)
9604
9605#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
0fc245cd 9606 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \
e2098065 9607 (long long const *)ADDR, \
9608 (__v8si)(__m256i)INDEX, \
9609 (__mmask8)0xFF, (int)SCALE)
9610
9611#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9612 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9613 (long long const *)ADDR, \
9614 (__v8si)(__m256i)INDEX, \
9615 (__mmask8)MASK, (int)SCALE)
9616
9617#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
0fc245cd 9618 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
e2098065 9619 (int const *)ADDR, \
9620 (__v8di)(__m512i)INDEX, \
9621 (__mmask8)0xFF, (int)SCALE)
9622
9623#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9624 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9625 (int const *)ADDR, \
9626 (__v8di)(__m512i)INDEX, \
9627 (__mmask8)MASK, (int)SCALE)
9628
9629#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
0fc245cd 9630 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \
e2098065 9631 (long long const *)ADDR, \
9632 (__v8di)(__m512i)INDEX, \
9633 (__mmask8)0xFF, (int)SCALE)
9634
9635#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9636 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9637 (long long const *)ADDR, \
9638 (__v8di)(__m512i)INDEX, \
9639 (__mmask8)MASK, (int)SCALE)
9640
9641#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9642 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9643 (__v16si)(__m512i)INDEX, \
9644 (__v16sf)(__m512)V1, (int)SCALE)
9645
9646#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9647 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9648 (__v16si)(__m512i)INDEX, \
9649 (__v16sf)(__m512)V1, (int)SCALE)
9650
9651#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9652 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9653 (__v8si)(__m256i)INDEX, \
9654 (__v8df)(__m512d)V1, (int)SCALE)
9655
9656#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9657 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9658 (__v8si)(__m256i)INDEX, \
9659 (__v8df)(__m512d)V1, (int)SCALE)
9660
9661#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9662 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9663 (__v8di)(__m512i)INDEX, \
9664 (__v8sf)(__m256)V1, (int)SCALE)
9665
9666#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9667 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9668 (__v8di)(__m512i)INDEX, \
9669 (__v8sf)(__m256)V1, (int)SCALE)
9670
9671#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9672 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9673 (__v8di)(__m512i)INDEX, \
9674 (__v8df)(__m512d)V1, (int)SCALE)
9675
9676#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9677 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9678 (__v8di)(__m512i)INDEX, \
9679 (__v8df)(__m512d)V1, (int)SCALE)
9680
9681#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9682 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9683 (__v16si)(__m512i)INDEX, \
9684 (__v16si)(__m512i)V1, (int)SCALE)
9685
9686#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9687 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9688 (__v16si)(__m512i)INDEX, \
9689 (__v16si)(__m512i)V1, (int)SCALE)
9690
9691#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9692 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9693 (__v8si)(__m256i)INDEX, \
9694 (__v8di)(__m512i)V1, (int)SCALE)
9695
9696#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9697 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9698 (__v8si)(__m256i)INDEX, \
9699 (__v8di)(__m512i)V1, (int)SCALE)
9700
9701#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9702 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9703 (__v8di)(__m512i)INDEX, \
9704 (__v8si)(__m256i)V1, (int)SCALE)
9705
9706#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9707 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9708 (__v8di)(__m512i)INDEX, \
9709 (__v8si)(__m256i)V1, (int)SCALE)
9710
9711#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9712 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9713 (__v8di)(__m512i)INDEX, \
9714 (__v8di)(__m512i)V1, (int)SCALE)
9715
9716#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9717 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9718 (__v8di)(__m512i)INDEX, \
9719 (__v8di)(__m512i)V1, (int)SCALE)
9720#endif
9721
9722extern __inline __m512d
9723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9724_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9725{
9726 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9727 (__v8df) __W,
9728 (__mmask8) __U);
9729}
9730
9731extern __inline __m512d
9732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9733_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9734{
9735 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9736 (__v8df)
9737 _mm512_setzero_pd (),
9738 (__mmask8) __U);
9739}
9740
9741extern __inline void
9742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9743_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9744{
9745 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9746 (__mmask8) __U);
9747}
9748
9749extern __inline __m512
9750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9751_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9752{
9753 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9754 (__v16sf) __W,
9755 (__mmask16) __U);
9756}
9757
9758extern __inline __m512
9759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9760_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9761{
9762 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9763 (__v16sf)
9764 _mm512_setzero_ps (),
9765 (__mmask16) __U);
9766}
9767
9768extern __inline void
9769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9770_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9771{
9772 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9773 (__mmask16) __U);
9774}
9775
9776extern __inline __m512i
9777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9779{
9780 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9781 (__v8di) __W,
9782 (__mmask8) __U);
9783}
9784
9785extern __inline __m512i
9786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9787_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9788{
9789 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9790 (__v8di)
9791 _mm512_setzero_si512 (),
9792 (__mmask8) __U);
9793}
9794
9795extern __inline void
9796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9797_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9798{
9799 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9800 (__mmask8) __U);
9801}
9802
9803extern __inline __m512i
9804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9806{
9807 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9808 (__v16si) __W,
9809 (__mmask16) __U);
9810}
9811
9812extern __inline __m512i
9813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9815{
9816 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9817 (__v16si)
9818 _mm512_setzero_si512 (),
9819 (__mmask16) __U);
9820}
9821
9822extern __inline void
9823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9825{
9826 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9827 (__mmask16) __U);
9828}
9829
9830extern __inline __m512d
9831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9832_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9833{
9834 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9835 (__v8df) __W,
9836 (__mmask8) __U);
9837}
9838
9839extern __inline __m512d
9840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9841_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9842{
9843 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9844 (__v8df)
9845 _mm512_setzero_pd (),
9846 (__mmask8) __U);
9847}
9848
9849extern __inline __m512d
9850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9851_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9852{
9853 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9854 (__v8df) __W,
9855 (__mmask8) __U);
9856}
9857
9858extern __inline __m512d
9859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9860_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9861{
9862 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9863 (__v8df)
9864 _mm512_setzero_pd (),
9865 (__mmask8) __U);
9866}
9867
9868extern __inline __m512
9869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9870_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9871{
9872 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9873 (__v16sf) __W,
9874 (__mmask16) __U);
9875}
9876
9877extern __inline __m512
9878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9879_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9880{
9881 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9882 (__v16sf)
9883 _mm512_setzero_ps (),
9884 (__mmask16) __U);
9885}
9886
9887extern __inline __m512
9888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9889_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9890{
9891 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9892 (__v16sf) __W,
9893 (__mmask16) __U);
9894}
9895
9896extern __inline __m512
9897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9898_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9899{
9900 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9901 (__v16sf)
9902 _mm512_setzero_ps (),
9903 (__mmask16) __U);
9904}
9905
9906extern __inline __m512i
9907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9908_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9909{
9910 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9911 (__v8di) __W,
9912 (__mmask8) __U);
9913}
9914
9915extern __inline __m512i
9916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9917_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9918{
9919 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9920 (__v8di)
9921 _mm512_setzero_si512 (),
9922 (__mmask8) __U);
9923}
9924
9925extern __inline __m512i
9926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9927_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9928{
9929 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9930 (__v8di) __W,
9931 (__mmask8) __U);
9932}
9933
9934extern __inline __m512i
9935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9936_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9937{
9938 return (__m512i)
9939 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9940 (__v8di)
9941 _mm512_setzero_si512 (),
9942 (__mmask8) __U);
9943}
9944
9945extern __inline __m512i
9946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9947_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9948{
9949 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9950 (__v16si) __W,
9951 (__mmask16) __U);
9952}
9953
9954extern __inline __m512i
9955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9956_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9957{
9958 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9959 (__v16si)
9960 _mm512_setzero_si512 (),
9961 (__mmask16) __U);
9962}
9963
9964extern __inline __m512i
9965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9966_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9967{
9968 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9969 (__v16si) __W,
9970 (__mmask16) __U);
9971}
9972
9973extern __inline __m512i
9974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9975_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9976{
9977 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9978 (__v16si)
9979 _mm512_setzero_si512
9980 (), (__mmask16) __U);
9981}
9982
9983/* Mask arithmetic operations */
9984extern __inline __mmask16
9985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9986_mm512_kand (__mmask16 __A, __mmask16 __B)
9987{
9988 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9989}
9990
9991extern __inline __mmask16
9992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993_mm512_kandn (__mmask16 __A, __mmask16 __B)
9994{
9995 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9996}
9997
9998extern __inline __mmask16
9999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10000_mm512_kor (__mmask16 __A, __mmask16 __B)
10001{
10002 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10003}
10004
10005extern __inline int
10006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10007_mm512_kortestz (__mmask16 __A, __mmask16 __B)
10008{
10009 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10010 (__mmask16) __B);
10011}
10012
10013extern __inline int
10014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10015_mm512_kortestc (__mmask16 __A, __mmask16 __B)
10016{
10017 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10018 (__mmask16) __B);
10019}
10020
10021extern __inline __mmask16
10022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10023_mm512_kxnor (__mmask16 __A, __mmask16 __B)
10024{
10025 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10026}
10027
10028extern __inline __mmask16
10029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10030_mm512_kxor (__mmask16 __A, __mmask16 __B)
10031{
10032 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10033}
10034
10035extern __inline __mmask16
10036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037_mm512_knot (__mmask16 __A)
10038{
10039 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10040}
10041
10042extern __inline __mmask16
10043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10044_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10045{
10046 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10047}
10048
10049#ifdef __OPTIMIZE__
10050extern __inline __m512i
10051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10052_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10053 const int __imm)
10054{
10055 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10056 (__v4si) __D,
10057 __imm,
10058 (__v16si)
10059 _mm512_setzero_si512 (),
10060 __B);
10061}
10062
10063extern __inline __m512
10064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10065_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10066 const int __imm)
10067{
10068 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10069 (__v4sf) __D,
10070 __imm,
10071 (__v16sf)
10072 _mm512_setzero_ps (), __B);
10073}
10074
10075extern __inline __m512i
10076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10077_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10078 __m128i __D, const int __imm)
10079{
10080 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10081 (__v4si) __D,
10082 __imm,
10083 (__v16si) __A,
10084 __B);
10085}
10086
10087extern __inline __m512
10088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10089_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10090 __m128 __D, const int __imm)
10091{
10092 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10093 (__v4sf) __D,
10094 __imm,
10095 (__v16sf) __A, __B);
10096}
10097#else
10098#define _mm512_maskz_insertf32x4(A, X, Y, C) \
10099 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10100 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10101 (__mmask8)(A)))
10102
10103#define _mm512_maskz_inserti32x4(A, X, Y, C) \
10104 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10105 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10106 (__mmask8)(A)))
10107
10108#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10109 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10110 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10111 (__mmask8)(B)))
10112
10113#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10114 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10115 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10116 (__mmask8)(B)))
10117#endif
10118
10119extern __inline __m512i
10120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10121_mm512_max_epi64 (__m512i __A, __m512i __B)
10122{
10123 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10124 (__v8di) __B,
10125 (__v8di)
0fc245cd 10126 _mm512_undefined_si512 (),
e2098065 10127 (__mmask8) -1);
10128}
10129
10130extern __inline __m512i
10131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10132_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10133{
10134 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10135 (__v8di) __B,
10136 (__v8di)
10137 _mm512_setzero_si512 (),
10138 __M);
10139}
10140
10141extern __inline __m512i
10142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10144{
10145 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10146 (__v8di) __B,
10147 (__v8di) __W, __M);
10148}
10149
10150extern __inline __m512i
10151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10152_mm512_min_epi64 (__m512i __A, __m512i __B)
10153{
10154 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10155 (__v8di) __B,
10156 (__v8di)
0fc245cd 10157 _mm512_undefined_si512 (),
e2098065 10158 (__mmask8) -1);
10159}
10160
10161extern __inline __m512i
10162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10163_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10164{
10165 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10166 (__v8di) __B,
10167 (__v8di) __W, __M);
10168}
10169
10170extern __inline __m512i
10171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10172_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10173{
10174 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10175 (__v8di) __B,
10176 (__v8di)
10177 _mm512_setzero_si512 (),
10178 __M);
10179}
10180
10181extern __inline __m512i
10182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10183_mm512_max_epu64 (__m512i __A, __m512i __B)
10184{
10185 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10186 (__v8di) __B,
10187 (__v8di)
0fc245cd 10188 _mm512_undefined_si512 (),
e2098065 10189 (__mmask8) -1);
10190}
10191
10192extern __inline __m512i
10193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10194_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10195{
10196 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10197 (__v8di) __B,
10198 (__v8di)
10199 _mm512_setzero_si512 (),
10200 __M);
10201}
10202
10203extern __inline __m512i
10204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10206{
10207 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10208 (__v8di) __B,
10209 (__v8di) __W, __M);
10210}
10211
10212extern __inline __m512i
10213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10214_mm512_min_epu64 (__m512i __A, __m512i __B)
10215{
10216 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10217 (__v8di) __B,
10218 (__v8di)
0fc245cd 10219 _mm512_undefined_si512 (),
e2098065 10220 (__mmask8) -1);
10221}
10222
10223extern __inline __m512i
10224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10225_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10226{
10227 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10228 (__v8di) __B,
10229 (__v8di) __W, __M);
10230}
10231
10232extern __inline __m512i
10233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10234_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10235{
10236 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10237 (__v8di) __B,
10238 (__v8di)
10239 _mm512_setzero_si512 (),
10240 __M);
10241}
10242
10243extern __inline __m512i
10244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10245_mm512_max_epi32 (__m512i __A, __m512i __B)
10246{
10247 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10248 (__v16si) __B,
10249 (__v16si)
0fc245cd 10250 _mm512_undefined_si512 (),
e2098065 10251 (__mmask16) -1);
10252}
10253
10254extern __inline __m512i
10255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10256_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10257{
10258 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10259 (__v16si) __B,
10260 (__v16si)
10261 _mm512_setzero_si512 (),
10262 __M);
10263}
10264
10265extern __inline __m512i
10266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10267_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10268{
10269 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10270 (__v16si) __B,
10271 (__v16si) __W, __M);
10272}
10273
10274extern __inline __m512i
10275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10276_mm512_min_epi32 (__m512i __A, __m512i __B)
10277{
10278 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10279 (__v16si) __B,
10280 (__v16si)
0fc245cd 10281 _mm512_undefined_si512 (),
e2098065 10282 (__mmask16) -1);
10283}
10284
10285extern __inline __m512i
10286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10287_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10288{
10289 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10290 (__v16si) __B,
10291 (__v16si)
10292 _mm512_setzero_si512 (),
10293 __M);
10294}
10295
10296extern __inline __m512i
10297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10298_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10299{
10300 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10301 (__v16si) __B,
10302 (__v16si) __W, __M);
10303}
10304
10305extern __inline __m512i
10306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10307_mm512_max_epu32 (__m512i __A, __m512i __B)
10308{
10309 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10310 (__v16si) __B,
10311 (__v16si)
0fc245cd 10312 _mm512_undefined_si512 (),
e2098065 10313 (__mmask16) -1);
10314}
10315
10316extern __inline __m512i
10317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10318_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10319{
10320 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10321 (__v16si) __B,
10322 (__v16si)
10323 _mm512_setzero_si512 (),
10324 __M);
10325}
10326
10327extern __inline __m512i
10328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10329_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10330{
10331 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10332 (__v16si) __B,
10333 (__v16si) __W, __M);
10334}
10335
10336extern __inline __m512i
10337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10338_mm512_min_epu32 (__m512i __A, __m512i __B)
10339{
10340 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10341 (__v16si) __B,
10342 (__v16si)
0fc245cd 10343 _mm512_undefined_si512 (),
e2098065 10344 (__mmask16) -1);
10345}
10346
10347extern __inline __m512i
10348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10349_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10350{
10351 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10352 (__v16si) __B,
10353 (__v16si)
10354 _mm512_setzero_si512 (),
10355 __M);
10356}
10357
10358extern __inline __m512i
10359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10360_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10361{
10362 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10363 (__v16si) __B,
10364 (__v16si) __W, __M);
10365}
10366
10367extern __inline __m512
10368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10369_mm512_unpacklo_ps (__m512 __A, __m512 __B)
10370{
10371 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10372 (__v16sf) __B,
10373 (__v16sf)
0fc245cd 10374 _mm512_undefined_ps (),
e2098065 10375 (__mmask16) -1);
10376}
10377
10378extern __inline __m512
10379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10380_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10381{
10382 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10383 (__v16sf) __B,
10384 (__v16sf) __W,
10385 (__mmask16) __U);
10386}
10387
10388extern __inline __m512
10389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10390_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10391{
10392 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10393 (__v16sf) __B,
10394 (__v16sf)
10395 _mm512_setzero_ps (),
10396 (__mmask16) __U);
10397}
10398
0b7cc9c6 10399#ifdef __OPTIMIZE__
10400extern __inline __m128d
10401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10402_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10403{
10404 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10405 (__v2df) __B,
10406 __R);
10407}
10408
10409extern __inline __m128
10410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10411_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10412{
10413 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10414 (__v4sf) __B,
10415 __R);
10416}
10417
10418extern __inline __m128d
10419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10420_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10421{
10422 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10423 (__v2df) __B,
10424 __R);
10425}
10426
10427extern __inline __m128
10428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10429_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10430{
10431 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10432 (__v4sf) __B,
10433 __R);
10434}
10435
10436#else
10437#define _mm_max_round_sd(A, B, C) \
10438 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10439
10440#define _mm_max_round_ss(A, B, C) \
10441 (__m128)__builtin_ia32_addss_round(A, B, C)
10442
10443#define _mm_min_round_sd(A, B, C) \
10444 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10445
10446#define _mm_min_round_ss(A, B, C) \
10447 (__m128)__builtin_ia32_subss_round(A, B, C)
10448#endif
10449
e2098065 10450extern __inline __m512d
10451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10452_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10453{
10454 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10455 (__v8df) __W,
10456 (__mmask8) __U);
10457}
10458
10459extern __inline __m512
10460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10461_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10462{
10463 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10464 (__v16sf) __W,
10465 (__mmask16) __U);
10466}
10467
10468extern __inline __m512i
10469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10470_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10471{
10472 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10473 (__v8di) __W,
10474 (__mmask8) __U);
10475}
10476
10477extern __inline __m512i
10478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10479_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10480{
10481 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10482 (__v16si) __W,
10483 (__mmask16) __U);
10484}
10485
0b7cc9c6 10486#ifdef __OPTIMIZE__
10487extern __inline __m128d
10488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10489_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10490{
10491 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10492 (__v2df) __A,
10493 (__v2df) __B,
10494 __R);
10495}
10496
10497extern __inline __m128
10498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10499_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10500{
10501 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10502 (__v4sf) __A,
10503 (__v4sf) __B,
10504 __R);
10505}
10506
10507extern __inline __m128d
10508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10509_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10510{
10511 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10512 (__v2df) __A,
10513 -(__v2df) __B,
10514 __R);
10515}
10516
10517extern __inline __m128
10518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10519_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10520{
10521 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10522 (__v4sf) __A,
10523 -(__v4sf) __B,
10524 __R);
10525}
10526
10527extern __inline __m128d
10528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10529_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10530{
10531 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10532 -(__v2df) __A,
10533 (__v2df) __B,
10534 __R);
10535}
10536
10537extern __inline __m128
10538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10539_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10540{
10541 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10542 -(__v4sf) __A,
10543 (__v4sf) __B,
10544 __R);
10545}
10546
10547extern __inline __m128d
10548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10549_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10550{
10551 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10552 -(__v2df) __A,
10553 -(__v2df) __B,
10554 __R);
10555}
10556
10557extern __inline __m128
10558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10559_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10560{
10561 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10562 -(__v4sf) __A,
10563 -(__v4sf) __B,
10564 __R);
10565}
10566#else
10567#define _mm_fmadd_round_sd(A, B, C, R) \
10568 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10569
10570#define _mm_fmadd_round_ss(A, B, C, R) \
10571 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10572
10573#define _mm_fmsub_round_sd(A, B, C, R) \
10574 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10575
10576#define _mm_fmsub_round_ss(A, B, C, R) \
10577 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10578
10579#define _mm_fnmadd_round_sd(A, B, C, R) \
10580 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10581
10582#define _mm_fnmadd_round_ss(A, B, C, R) \
10583 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10584
10585#define _mm_fnmsub_round_sd(A, B, C, R) \
10586 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10587
10588#define _mm_fnmsub_round_ss(A, B, C, R) \
10589 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10590#endif
10591
e2098065 10592#ifdef __OPTIMIZE__
10593extern __inline int
10594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10595_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10596{
10597 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10598}
10599
10600extern __inline int
10601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10602_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10603{
10604 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10605}
10606#else
10607#define _mm_comi_round_ss(A, B, C, D)\
10608__builtin_ia32_vcomiss(A, B, C, D)
10609#define _mm_comi_round_sd(A, B, C, D)\
10610__builtin_ia32_vcomisd(A, B, C, D)
10611#endif
10612
10613extern __inline __m512d
10614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10615_mm512_sqrt_pd (__m512d __A)
10616{
10617 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10618 (__v8df)
0fc245cd 10619 _mm512_undefined_pd (),
e2098065 10620 (__mmask8) -1,
10621 _MM_FROUND_CUR_DIRECTION);
10622}
10623
10624extern __inline __m512d
10625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10626_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10627{
10628 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10629 (__v8df) __W,
10630 (__mmask8) __U,
10631 _MM_FROUND_CUR_DIRECTION);
10632}
10633
10634extern __inline __m512d
10635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10636_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10637{
10638 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10639 (__v8df)
10640 _mm512_setzero_pd (),
10641 (__mmask8) __U,
10642 _MM_FROUND_CUR_DIRECTION);
10643}
10644
10645extern __inline __m512
10646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10647_mm512_sqrt_ps (__m512 __A)
10648{
10649 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10650 (__v16sf)
0fc245cd 10651 _mm512_undefined_ps (),
e2098065 10652 (__mmask16) -1,
10653 _MM_FROUND_CUR_DIRECTION);
10654}
10655
10656extern __inline __m512
10657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10659{
10660 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10661 (__v16sf) __W,
10662 (__mmask16) __U,
10663 _MM_FROUND_CUR_DIRECTION);
10664}
10665
10666extern __inline __m512
10667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10668_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10669{
10670 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10671 (__v16sf)
10672 _mm512_setzero_ps (),
10673 (__mmask16) __U,
10674 _MM_FROUND_CUR_DIRECTION);
10675}
10676
10677extern __inline __m512d
10678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10679_mm512_add_pd (__m512d __A, __m512d __B)
10680{
d521a5b2 10681 return (__m512d) ((__v8df)__A + (__v8df)__B);
e2098065 10682}
10683
10684extern __inline __m512d
10685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10687{
10688 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10689 (__v8df) __B,
10690 (__v8df) __W,
10691 (__mmask8) __U,
10692 _MM_FROUND_CUR_DIRECTION);
10693}
10694
10695extern __inline __m512d
10696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10697_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10698{
10699 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10700 (__v8df) __B,
10701 (__v8df)
10702 _mm512_setzero_pd (),
10703 (__mmask8) __U,
10704 _MM_FROUND_CUR_DIRECTION);
10705}
10706
10707extern __inline __m512
10708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10709_mm512_add_ps (__m512 __A, __m512 __B)
10710{
d521a5b2 10711 return (__m512) ((__v16sf)__A + (__v16sf)__B);
e2098065 10712}
10713
10714extern __inline __m512
10715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10717{
10718 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10719 (__v16sf) __B,
10720 (__v16sf) __W,
10721 (__mmask16) __U,
10722 _MM_FROUND_CUR_DIRECTION);
10723}
10724
10725extern __inline __m512
10726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10727_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10728{
10729 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10730 (__v16sf) __B,
10731 (__v16sf)
10732 _mm512_setzero_ps (),
10733 (__mmask16) __U,
10734 _MM_FROUND_CUR_DIRECTION);
10735}
10736
10737extern __inline __m512d
10738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10739_mm512_sub_pd (__m512d __A, __m512d __B)
10740{
d521a5b2 10741 return (__m512d) ((__v8df)__A - (__v8df)__B);
e2098065 10742}
10743
10744extern __inline __m512d
10745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10747{
10748 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10749 (__v8df) __B,
10750 (__v8df) __W,
10751 (__mmask8) __U,
10752 _MM_FROUND_CUR_DIRECTION);
10753}
10754
10755extern __inline __m512d
10756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10757_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10758{
10759 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10760 (__v8df) __B,
10761 (__v8df)
10762 _mm512_setzero_pd (),
10763 (__mmask8) __U,
10764 _MM_FROUND_CUR_DIRECTION);
10765}
10766
10767extern __inline __m512
10768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10769_mm512_sub_ps (__m512 __A, __m512 __B)
10770{
d521a5b2 10771 return (__m512) ((__v16sf)__A - (__v16sf)__B);
e2098065 10772}
10773
10774extern __inline __m512
10775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10776_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10777{
10778 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10779 (__v16sf) __B,
10780 (__v16sf) __W,
10781 (__mmask16) __U,
10782 _MM_FROUND_CUR_DIRECTION);
10783}
10784
10785extern __inline __m512
10786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10788{
10789 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10790 (__v16sf) __B,
10791 (__v16sf)
10792 _mm512_setzero_ps (),
10793 (__mmask16) __U,
10794 _MM_FROUND_CUR_DIRECTION);
10795}
10796
10797extern __inline __m512d
10798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10799_mm512_mul_pd (__m512d __A, __m512d __B)
10800{
d521a5b2 10801 return (__m512d) ((__v8df)__A * (__v8df)__B);
e2098065 10802}
10803
10804extern __inline __m512d
10805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10806_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10807{
10808 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10809 (__v8df) __B,
10810 (__v8df) __W,
10811 (__mmask8) __U,
10812 _MM_FROUND_CUR_DIRECTION);
10813}
10814
10815extern __inline __m512d
10816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10817_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10818{
10819 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10820 (__v8df) __B,
10821 (__v8df)
10822 _mm512_setzero_pd (),
10823 (__mmask8) __U,
10824 _MM_FROUND_CUR_DIRECTION);
10825}
10826
10827extern __inline __m512
10828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10829_mm512_mul_ps (__m512 __A, __m512 __B)
10830{
d521a5b2 10831 return (__m512) ((__v16sf)__A * (__v16sf)__B);
e2098065 10832}
10833
10834extern __inline __m512
10835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10836_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10837{
10838 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10839 (__v16sf) __B,
10840 (__v16sf) __W,
10841 (__mmask16) __U,
10842 _MM_FROUND_CUR_DIRECTION);
10843}
10844
10845extern __inline __m512
10846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10847_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10848{
10849 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10850 (__v16sf) __B,
10851 (__v16sf)
10852 _mm512_setzero_ps (),
10853 (__mmask16) __U,
10854 _MM_FROUND_CUR_DIRECTION);
10855}
10856
10857extern __inline __m512d
10858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10859_mm512_div_pd (__m512d __M, __m512d __V)
10860{
d521a5b2 10861 return (__m512d) ((__v8df)__M / (__v8df)__V);
e2098065 10862}
10863
10864extern __inline __m512d
10865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10866_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10867{
10868 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10869 (__v8df) __V,
10870 (__v8df) __W,
10871 (__mmask8) __U,
10872 _MM_FROUND_CUR_DIRECTION);
10873}
10874
10875extern __inline __m512d
10876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10877_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10878{
10879 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10880 (__v8df) __V,
10881 (__v8df)
10882 _mm512_setzero_pd (),
10883 (__mmask8) __U,
10884 _MM_FROUND_CUR_DIRECTION);
10885}
10886
10887extern __inline __m512
10888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10889_mm512_div_ps (__m512 __A, __m512 __B)
10890{
d521a5b2 10891 return (__m512) ((__v16sf)__A / (__v16sf)__B);
e2098065 10892}
10893
10894extern __inline __m512
10895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10896_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10897{
10898 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10899 (__v16sf) __B,
10900 (__v16sf) __W,
10901 (__mmask16) __U,
10902 _MM_FROUND_CUR_DIRECTION);
10903}
10904
10905extern __inline __m512
10906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10907_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10908{
10909 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10910 (__v16sf) __B,
10911 (__v16sf)
10912 _mm512_setzero_ps (),
10913 (__mmask16) __U,
10914 _MM_FROUND_CUR_DIRECTION);
10915}
10916
10917extern __inline __m512d
10918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10919_mm512_max_pd (__m512d __A, __m512d __B)
10920{
10921 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10922 (__v8df) __B,
10923 (__v8df)
0fc245cd 10924 _mm512_undefined_pd (),
e2098065 10925 (__mmask8) -1,
10926 _MM_FROUND_CUR_DIRECTION);
10927}
10928
10929extern __inline __m512d
10930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10931_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10932{
10933 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10934 (__v8df) __B,
10935 (__v8df) __W,
10936 (__mmask8) __U,
10937 _MM_FROUND_CUR_DIRECTION);
10938}
10939
10940extern __inline __m512d
10941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10943{
10944 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10945 (__v8df) __B,
10946 (__v8df)
10947 _mm512_setzero_pd (),
10948 (__mmask8) __U,
10949 _MM_FROUND_CUR_DIRECTION);
10950}
10951
10952extern __inline __m512
10953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10954_mm512_max_ps (__m512 __A, __m512 __B)
10955{
10956 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10957 (__v16sf) __B,
10958 (__v16sf)
0fc245cd 10959 _mm512_undefined_ps (),
e2098065 10960 (__mmask16) -1,
10961 _MM_FROUND_CUR_DIRECTION);
10962}
10963
10964extern __inline __m512
10965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10966_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10967{
10968 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10969 (__v16sf) __B,
10970 (__v16sf) __W,
10971 (__mmask16) __U,
10972 _MM_FROUND_CUR_DIRECTION);
10973}
10974
10975extern __inline __m512
10976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10977_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10978{
10979 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10980 (__v16sf) __B,
10981 (__v16sf)
10982 _mm512_setzero_ps (),
10983 (__mmask16) __U,
10984 _MM_FROUND_CUR_DIRECTION);
10985}
10986
10987extern __inline __m512d
10988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10989_mm512_min_pd (__m512d __A, __m512d __B)
10990{
10991 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10992 (__v8df) __B,
10993 (__v8df)
0fc245cd 10994 _mm512_undefined_pd (),
e2098065 10995 (__mmask8) -1,
10996 _MM_FROUND_CUR_DIRECTION);
10997}
10998
10999extern __inline __m512d
11000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11001_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11002{
11003 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11004 (__v8df) __B,
11005 (__v8df) __W,
11006 (__mmask8) __U,
11007 _MM_FROUND_CUR_DIRECTION);
11008}
11009
11010extern __inline __m512d
11011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11012_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11013{
11014 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11015 (__v8df) __B,
11016 (__v8df)
11017 _mm512_setzero_pd (),
11018 (__mmask8) __U,
11019 _MM_FROUND_CUR_DIRECTION);
11020}
11021
11022extern __inline __m512
11023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11024_mm512_min_ps (__m512 __A, __m512 __B)
11025{
11026 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11027 (__v16sf) __B,
11028 (__v16sf)
0fc245cd 11029 _mm512_undefined_ps (),
e2098065 11030 (__mmask16) -1,
11031 _MM_FROUND_CUR_DIRECTION);
11032}
11033
11034extern __inline __m512
11035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11036_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11037{
11038 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11039 (__v16sf) __B,
11040 (__v16sf) __W,
11041 (__mmask16) __U,
11042 _MM_FROUND_CUR_DIRECTION);
11043}
11044
11045extern __inline __m512
11046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11047_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11048{
11049 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11050 (__v16sf) __B,
11051 (__v16sf)
11052 _mm512_setzero_ps (),
11053 (__mmask16) __U,
11054 _MM_FROUND_CUR_DIRECTION);
11055}
11056
11057extern __inline __m512d
11058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11059_mm512_scalef_pd (__m512d __A, __m512d __B)
11060{
11061 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11062 (__v8df) __B,
11063 (__v8df)
0fc245cd 11064 _mm512_undefined_pd (),
e2098065 11065 (__mmask8) -1,
11066 _MM_FROUND_CUR_DIRECTION);
11067}
11068
11069extern __inline __m512d
11070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11071_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11072{
11073 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11074 (__v8df) __B,
11075 (__v8df) __W,
11076 (__mmask8) __U,
11077 _MM_FROUND_CUR_DIRECTION);
11078}
11079
11080extern __inline __m512d
11081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11083{
11084 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11085 (__v8df) __B,
11086 (__v8df)
11087 _mm512_setzero_pd (),
11088 (__mmask8) __U,
11089 _MM_FROUND_CUR_DIRECTION);
11090}
11091
11092extern __inline __m512
11093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11094_mm512_scalef_ps (__m512 __A, __m512 __B)
11095{
11096 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11097 (__v16sf) __B,
11098 (__v16sf)
0fc245cd 11099 _mm512_undefined_ps (),
e2098065 11100 (__mmask16) -1,
11101 _MM_FROUND_CUR_DIRECTION);
11102}
11103
11104extern __inline __m512
11105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11106_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11107{
11108 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11109 (__v16sf) __B,
11110 (__v16sf) __W,
11111 (__mmask16) __U,
11112 _MM_FROUND_CUR_DIRECTION);
11113}
11114
11115extern __inline __m512
11116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11117_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11118{
11119 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11120 (__v16sf) __B,
11121 (__v16sf)
11122 _mm512_setzero_ps (),
11123 (__mmask16) __U,
11124 _MM_FROUND_CUR_DIRECTION);
11125}
11126
0b7cc9c6 11127extern __inline __m128d
11128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11129_mm_scalef_sd (__m128d __A, __m128d __B)
11130{
11131 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11132 (__v2df) __B,
11133 _MM_FROUND_CUR_DIRECTION);
11134}
11135
11136extern __inline __m128
11137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11138_mm_scalef_ss (__m128 __A, __m128 __B)
11139{
11140 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11141 (__v4sf) __B,
11142 _MM_FROUND_CUR_DIRECTION);
11143}
11144
e2098065 11145extern __inline __m512d
11146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11148{
11149 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11150 (__v8df) __B,
11151 (__v8df) __C,
11152 (__mmask8) -1,
11153 _MM_FROUND_CUR_DIRECTION);
11154}
11155
11156extern __inline __m512d
11157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11159{
11160 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11161 (__v8df) __B,
11162 (__v8df) __C,
11163 (__mmask8) __U,
11164 _MM_FROUND_CUR_DIRECTION);
11165}
11166
11167extern __inline __m512d
11168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11170{
11171 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11172 (__v8df) __B,
11173 (__v8df) __C,
11174 (__mmask8) __U,
11175 _MM_FROUND_CUR_DIRECTION);
11176}
11177
11178extern __inline __m512d
11179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11181{
11182 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11183 (__v8df) __B,
11184 (__v8df) __C,
11185 (__mmask8) __U,
11186 _MM_FROUND_CUR_DIRECTION);
11187}
11188
11189extern __inline __m512
11190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11191_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11192{
11193 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11194 (__v16sf) __B,
11195 (__v16sf) __C,
11196 (__mmask16) -1,
11197 _MM_FROUND_CUR_DIRECTION);
11198}
11199
11200extern __inline __m512
11201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11202_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11203{
11204 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11205 (__v16sf) __B,
11206 (__v16sf) __C,
11207 (__mmask16) __U,
11208 _MM_FROUND_CUR_DIRECTION);
11209}
11210
11211extern __inline __m512
11212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11213_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11214{
11215 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11216 (__v16sf) __B,
11217 (__v16sf) __C,
11218 (__mmask16) __U,
11219 _MM_FROUND_CUR_DIRECTION);
11220}
11221
11222extern __inline __m512
11223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11224_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11225{
11226 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11227 (__v16sf) __B,
11228 (__v16sf) __C,
11229 (__mmask16) __U,
11230 _MM_FROUND_CUR_DIRECTION);
11231}
11232
11233extern __inline __m512d
11234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11235_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11236{
11237 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11238 (__v8df) __B,
11239 -(__v8df) __C,
11240 (__mmask8) -1,
11241 _MM_FROUND_CUR_DIRECTION);
11242}
11243
11244extern __inline __m512d
11245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11246_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11247{
11248 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11249 (__v8df) __B,
11250 -(__v8df) __C,
11251 (__mmask8) __U,
11252 _MM_FROUND_CUR_DIRECTION);
11253}
11254
11255extern __inline __m512d
11256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11257_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11258{
11259 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11260 (__v8df) __B,
11261 (__v8df) __C,
11262 (__mmask8) __U,
11263 _MM_FROUND_CUR_DIRECTION);
11264}
11265
11266extern __inline __m512d
11267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11268_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11269{
11270 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11271 (__v8df) __B,
11272 -(__v8df) __C,
11273 (__mmask8) __U,
11274 _MM_FROUND_CUR_DIRECTION);
11275}
11276
11277extern __inline __m512
11278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11279_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11280{
11281 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11282 (__v16sf) __B,
11283 -(__v16sf) __C,
11284 (__mmask16) -1,
11285 _MM_FROUND_CUR_DIRECTION);
11286}
11287
11288extern __inline __m512
11289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11290_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11291{
11292 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11293 (__v16sf) __B,
11294 -(__v16sf) __C,
11295 (__mmask16) __U,
11296 _MM_FROUND_CUR_DIRECTION);
11297}
11298
11299extern __inline __m512
11300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11301_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11302{
11303 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11304 (__v16sf) __B,
11305 (__v16sf) __C,
11306 (__mmask16) __U,
11307 _MM_FROUND_CUR_DIRECTION);
11308}
11309
11310extern __inline __m512
11311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11312_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11313{
11314 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11315 (__v16sf) __B,
11316 -(__v16sf) __C,
11317 (__mmask16) __U,
11318 _MM_FROUND_CUR_DIRECTION);
11319}
11320
11321extern __inline __m512d
11322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11323_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11324{
11325 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11326 (__v8df) __B,
11327 (__v8df) __C,
11328 (__mmask8) -1,
11329 _MM_FROUND_CUR_DIRECTION);
11330}
11331
11332extern __inline __m512d
11333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11334_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11335{
11336 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11337 (__v8df) __B,
11338 (__v8df) __C,
11339 (__mmask8) __U,
11340 _MM_FROUND_CUR_DIRECTION);
11341}
11342
11343extern __inline __m512d
11344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11345_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11346{
11347 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11348 (__v8df) __B,
11349 (__v8df) __C,
11350 (__mmask8) __U,
11351 _MM_FROUND_CUR_DIRECTION);
11352}
11353
11354extern __inline __m512d
11355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11356_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11357{
11358 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11359 (__v8df) __B,
11360 (__v8df) __C,
11361 (__mmask8) __U,
11362 _MM_FROUND_CUR_DIRECTION);
11363}
11364
11365extern __inline __m512
11366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11367_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11368{
11369 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11370 (__v16sf) __B,
11371 (__v16sf) __C,
11372 (__mmask16) -1,
11373 _MM_FROUND_CUR_DIRECTION);
11374}
11375
11376extern __inline __m512
11377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11378_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11379{
11380 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11381 (__v16sf) __B,
11382 (__v16sf) __C,
11383 (__mmask16) __U,
11384 _MM_FROUND_CUR_DIRECTION);
11385}
11386
11387extern __inline __m512
11388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11389_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11390{
11391 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11392 (__v16sf) __B,
11393 (__v16sf) __C,
11394 (__mmask16) __U,
11395 _MM_FROUND_CUR_DIRECTION);
11396}
11397
11398extern __inline __m512
11399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11400_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11401{
11402 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11403 (__v16sf) __B,
11404 (__v16sf) __C,
11405 (__mmask16) __U,
11406 _MM_FROUND_CUR_DIRECTION);
11407}
11408
11409extern __inline __m512d
11410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11411_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11412{
11413 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11414 (__v8df) __B,
11415 -(__v8df) __C,
11416 (__mmask8) -1,
11417 _MM_FROUND_CUR_DIRECTION);
11418}
11419
11420extern __inline __m512d
11421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11422_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11423{
11424 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11425 (__v8df) __B,
11426 -(__v8df) __C,
11427 (__mmask8) __U,
11428 _MM_FROUND_CUR_DIRECTION);
11429}
11430
11431extern __inline __m512d
11432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11433_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11434{
11435 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11436 (__v8df) __B,
11437 (__v8df) __C,
11438 (__mmask8) __U,
11439 _MM_FROUND_CUR_DIRECTION);
11440}
11441
11442extern __inline __m512d
11443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11444_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11445{
11446 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11447 (__v8df) __B,
11448 -(__v8df) __C,
11449 (__mmask8) __U,
11450 _MM_FROUND_CUR_DIRECTION);
11451}
11452
11453extern __inline __m512
11454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11455_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11456{
11457 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11458 (__v16sf) __B,
11459 -(__v16sf) __C,
11460 (__mmask16) -1,
11461 _MM_FROUND_CUR_DIRECTION);
11462}
11463
11464extern __inline __m512
11465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11466_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11467{
11468 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11469 (__v16sf) __B,
11470 -(__v16sf) __C,
11471 (__mmask16) __U,
11472 _MM_FROUND_CUR_DIRECTION);
11473}
11474
11475extern __inline __m512
11476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11478{
11479 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11480 (__v16sf) __B,
11481 (__v16sf) __C,
11482 (__mmask16) __U,
11483 _MM_FROUND_CUR_DIRECTION);
11484}
11485
11486extern __inline __m512
11487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11488_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11489{
11490 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11491 (__v16sf) __B,
11492 -(__v16sf) __C,
11493 (__mmask16) __U,
11494 _MM_FROUND_CUR_DIRECTION);
11495}
11496
11497extern __inline __m512d
11498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11499_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11500{
11501 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11502 (__v8df) __B,
11503 (__v8df) __C,
11504 (__mmask8) -1,
11505 _MM_FROUND_CUR_DIRECTION);
11506}
11507
11508extern __inline __m512d
11509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11510_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11511{
11512 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11513 (__v8df) __B,
11514 (__v8df) __C,
11515 (__mmask8) __U,
11516 _MM_FROUND_CUR_DIRECTION);
11517}
11518
11519extern __inline __m512d
11520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11521_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11522{
11523 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11524 (__v8df) __B,
11525 (__v8df) __C,
11526 (__mmask8) __U,
11527 _MM_FROUND_CUR_DIRECTION);
11528}
11529
11530extern __inline __m512d
11531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11533{
11534 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11535 (__v8df) __B,
11536 (__v8df) __C,
11537 (__mmask8) __U,
11538 _MM_FROUND_CUR_DIRECTION);
11539}
11540
11541extern __inline __m512
11542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11543_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11544{
11545 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11546 (__v16sf) __B,
11547 (__v16sf) __C,
11548 (__mmask16) -1,
11549 _MM_FROUND_CUR_DIRECTION);
11550}
11551
11552extern __inline __m512
11553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11554_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11555{
11556 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11557 (__v16sf) __B,
11558 (__v16sf) __C,
11559 (__mmask16) __U,
11560 _MM_FROUND_CUR_DIRECTION);
11561}
11562
11563extern __inline __m512
11564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11565_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11566{
11567 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11568 (__v16sf) __B,
11569 (__v16sf) __C,
11570 (__mmask16) __U,
11571 _MM_FROUND_CUR_DIRECTION);
11572}
11573
11574extern __inline __m512
11575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11576_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11577{
11578 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11579 (__v16sf) __B,
11580 (__v16sf) __C,
11581 (__mmask16) __U,
11582 _MM_FROUND_CUR_DIRECTION);
11583}
11584
11585extern __inline __m512d
11586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11587_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11588{
11589 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11590 (__v8df) __B,
11591 -(__v8df) __C,
11592 (__mmask8) -1,
11593 _MM_FROUND_CUR_DIRECTION);
11594}
11595
11596extern __inline __m512d
11597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11598_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11599{
11600 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11601 (__v8df) __B,
11602 (__v8df) __C,
11603 (__mmask8) __U,
11604 _MM_FROUND_CUR_DIRECTION);
11605}
11606
11607extern __inline __m512d
11608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11609_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11610{
11611 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11612 (__v8df) __B,
11613 (__v8df) __C,
11614 (__mmask8) __U,
11615 _MM_FROUND_CUR_DIRECTION);
11616}
11617
11618extern __inline __m512d
11619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11621{
11622 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11623 (__v8df) __B,
11624 -(__v8df) __C,
11625 (__mmask8) __U,
11626 _MM_FROUND_CUR_DIRECTION);
11627}
11628
11629extern __inline __m512
11630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11631_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11632{
11633 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11634 (__v16sf) __B,
11635 -(__v16sf) __C,
11636 (__mmask16) -1,
11637 _MM_FROUND_CUR_DIRECTION);
11638}
11639
11640extern __inline __m512
11641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11642_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11643{
11644 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11645 (__v16sf) __B,
11646 (__v16sf) __C,
11647 (__mmask16) __U,
11648 _MM_FROUND_CUR_DIRECTION);
11649}
11650
11651extern __inline __m512
11652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11654{
11655 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11656 (__v16sf) __B,
11657 (__v16sf) __C,
11658 (__mmask16) __U,
11659 _MM_FROUND_CUR_DIRECTION);
11660}
11661
11662extern __inline __m512
11663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11664_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11665{
11666 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11667 (__v16sf) __B,
11668 -(__v16sf) __C,
11669 (__mmask16) __U,
11670 _MM_FROUND_CUR_DIRECTION);
11671}
11672
11673extern __inline __m256i
11674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11675_mm512_cvttpd_epi32 (__m512d __A)
11676{
11677 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11678 (__v8si)
0fc245cd 11679 _mm256_undefined_si256 (),
e2098065 11680 (__mmask8) -1,
11681 _MM_FROUND_CUR_DIRECTION);
11682}
11683
11684extern __inline __m256i
11685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11686_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11687{
11688 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11689 (__v8si) __W,
11690 (__mmask8) __U,
11691 _MM_FROUND_CUR_DIRECTION);
11692}
11693
11694extern __inline __m256i
11695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11696_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11697{
11698 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11699 (__v8si)
11700 _mm256_setzero_si256 (),
11701 (__mmask8) __U,
11702 _MM_FROUND_CUR_DIRECTION);
11703}
11704
11705extern __inline __m256i
11706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11707_mm512_cvttpd_epu32 (__m512d __A)
11708{
11709 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11710 (__v8si)
0fc245cd 11711 _mm256_undefined_si256 (),
e2098065 11712 (__mmask8) -1,
11713 _MM_FROUND_CUR_DIRECTION);
11714}
11715
11716extern __inline __m256i
11717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11718_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11719{
11720 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11721 (__v8si) __W,
11722 (__mmask8) __U,
11723 _MM_FROUND_CUR_DIRECTION);
11724}
11725
11726extern __inline __m256i
11727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11728_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11729{
11730 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11731 (__v8si)
11732 _mm256_setzero_si256 (),
11733 (__mmask8) __U,
11734 _MM_FROUND_CUR_DIRECTION);
11735}
11736
11737extern __inline __m256i
11738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11739_mm512_cvtpd_epi32 (__m512d __A)
11740{
11741 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11742 (__v8si)
0fc245cd 11743 _mm256_undefined_si256 (),
e2098065 11744 (__mmask8) -1,
11745 _MM_FROUND_CUR_DIRECTION);
11746}
11747
11748extern __inline __m256i
11749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11750_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11751{
11752 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11753 (__v8si) __W,
11754 (__mmask8) __U,
11755 _MM_FROUND_CUR_DIRECTION);
11756}
11757
11758extern __inline __m256i
11759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11760_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11761{
11762 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11763 (__v8si)
11764 _mm256_setzero_si256 (),
11765 (__mmask8) __U,
11766 _MM_FROUND_CUR_DIRECTION);
11767}
11768
11769extern __inline __m256i
11770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11771_mm512_cvtpd_epu32 (__m512d __A)
11772{
11773 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11774 (__v8si)
0fc245cd 11775 _mm256_undefined_si256 (),
e2098065 11776 (__mmask8) -1,
11777 _MM_FROUND_CUR_DIRECTION);
11778}
11779
11780extern __inline __m256i
11781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11782_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11783{
11784 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11785 (__v8si) __W,
11786 (__mmask8) __U,
11787 _MM_FROUND_CUR_DIRECTION);
11788}
11789
11790extern __inline __m256i
11791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11792_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11793{
11794 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11795 (__v8si)
11796 _mm256_setzero_si256 (),
11797 (__mmask8) __U,
11798 _MM_FROUND_CUR_DIRECTION);
11799}
11800
11801extern __inline __m512i
11802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11803_mm512_cvttps_epi32 (__m512 __A)
11804{
11805 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11806 (__v16si)
0fc245cd 11807 _mm512_undefined_si512 (),
e2098065 11808 (__mmask16) -1,
11809 _MM_FROUND_CUR_DIRECTION);
11810}
11811
11812extern __inline __m512i
11813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11815{
11816 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11817 (__v16si) __W,
11818 (__mmask16) __U,
11819 _MM_FROUND_CUR_DIRECTION);
11820}
11821
11822extern __inline __m512i
11823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11825{
11826 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11827 (__v16si)
11828 _mm512_setzero_si512 (),
11829 (__mmask16) __U,
11830 _MM_FROUND_CUR_DIRECTION);
11831}
11832
11833extern __inline __m512i
11834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835_mm512_cvttps_epu32 (__m512 __A)
11836{
11837 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11838 (__v16si)
0fc245cd 11839 _mm512_undefined_si512 (),
e2098065 11840 (__mmask16) -1,
11841 _MM_FROUND_CUR_DIRECTION);
11842}
11843
11844extern __inline __m512i
11845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11846_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11847{
11848 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11849 (__v16si) __W,
11850 (__mmask16) __U,
11851 _MM_FROUND_CUR_DIRECTION);
11852}
11853
11854extern __inline __m512i
11855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11856_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11857{
11858 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11859 (__v16si)
11860 _mm512_setzero_si512 (),
11861 (__mmask16) __U,
11862 _MM_FROUND_CUR_DIRECTION);
11863}
11864
11865extern __inline __m512i
11866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11867_mm512_cvtps_epi32 (__m512 __A)
11868{
11869 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11870 (__v16si)
0fc245cd 11871 _mm512_undefined_si512 (),
e2098065 11872 (__mmask16) -1,
11873 _MM_FROUND_CUR_DIRECTION);
11874}
11875
11876extern __inline __m512i
11877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11879{
11880 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11881 (__v16si) __W,
11882 (__mmask16) __U,
11883 _MM_FROUND_CUR_DIRECTION);
11884}
11885
11886extern __inline __m512i
11887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11888_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11889{
11890 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11891 (__v16si)
11892 _mm512_setzero_si512 (),
11893 (__mmask16) __U,
11894 _MM_FROUND_CUR_DIRECTION);
11895}
11896
11897extern __inline __m512i
11898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11899_mm512_cvtps_epu32 (__m512 __A)
11900{
11901 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11902 (__v16si)
0fc245cd 11903 _mm512_undefined_si512 (),
e2098065 11904 (__mmask16) -1,
11905 _MM_FROUND_CUR_DIRECTION);
11906}
11907
11908extern __inline __m512i
11909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11910_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11911{
11912 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11913 (__v16si) __W,
11914 (__mmask16) __U,
11915 _MM_FROUND_CUR_DIRECTION);
11916}
11917
11918extern __inline __m512i
11919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11920_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11921{
11922 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11923 (__v16si)
11924 _mm512_setzero_si512 (),
11925 (__mmask16) __U,
11926 _MM_FROUND_CUR_DIRECTION);
11927}
11928
11929#ifdef __x86_64__
11930extern __inline __m128
11931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11932_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11933{
11934 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11935 _MM_FROUND_CUR_DIRECTION);
11936}
11937
11938extern __inline __m128d
11939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11941{
11942 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11943 _MM_FROUND_CUR_DIRECTION);
11944}
11945#endif
11946
11947extern __inline __m128
11948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11949_mm_cvtu32_ss (__m128 __A, unsigned __B)
11950{
11951 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11952 _MM_FROUND_CUR_DIRECTION);
11953}
11954
11955extern __inline __m512
11956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11957_mm512_cvtepi32_ps (__m512i __A)
11958{
11959 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11960 (__v16sf)
0fc245cd 11961 _mm512_undefined_ps (),
e2098065 11962 (__mmask16) -1,
11963 _MM_FROUND_CUR_DIRECTION);
11964}
11965
11966extern __inline __m512
11967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11968_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11969{
11970 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11971 (__v16sf) __W,
11972 (__mmask16) __U,
11973 _MM_FROUND_CUR_DIRECTION);
11974}
11975
11976extern __inline __m512
11977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11978_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11979{
11980 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11981 (__v16sf)
11982 _mm512_setzero_ps (),
11983 (__mmask16) __U,
11984 _MM_FROUND_CUR_DIRECTION);
11985}
11986
11987extern __inline __m512
11988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11989_mm512_cvtepu32_ps (__m512i __A)
11990{
11991 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11992 (__v16sf)
0fc245cd 11993 _mm512_undefined_ps (),
e2098065 11994 (__mmask16) -1,
11995 _MM_FROUND_CUR_DIRECTION);
11996}
11997
11998extern __inline __m512
11999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12000_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12001{
12002 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12003 (__v16sf) __W,
12004 (__mmask16) __U,
12005 _MM_FROUND_CUR_DIRECTION);
12006}
12007
12008extern __inline __m512
12009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12010_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12011{
12012 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12013 (__v16sf)
12014 _mm512_setzero_ps (),
12015 (__mmask16) __U,
12016 _MM_FROUND_CUR_DIRECTION);
12017}
12018
12019#ifdef __OPTIMIZE__
12020extern __inline __m512d
12021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12022_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12023{
12024 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12025 (__v8df) __B,
12026 (__v8di) __C,
12027 __imm,
12028 (__mmask8) -1,
12029 _MM_FROUND_CUR_DIRECTION);
12030}
12031
12032extern __inline __m512d
12033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12034_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12035 __m512i __C, const int __imm)
12036{
12037 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12038 (__v8df) __B,
12039 (__v8di) __C,
12040 __imm,
12041 (__mmask8) __U,
12042 _MM_FROUND_CUR_DIRECTION);
12043}
12044
12045extern __inline __m512d
12046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12047_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12048 __m512i __C, const int __imm)
12049{
12050 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12051 (__v8df) __B,
12052 (__v8di) __C,
12053 __imm,
12054 (__mmask8) __U,
12055 _MM_FROUND_CUR_DIRECTION);
12056}
12057
12058extern __inline __m512
12059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12060_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12061{
12062 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12063 (__v16sf) __B,
12064 (__v16si) __C,
12065 __imm,
12066 (__mmask16) -1,
12067 _MM_FROUND_CUR_DIRECTION);
12068}
12069
12070extern __inline __m512
12071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12072_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12073 __m512i __C, const int __imm)
12074{
12075 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12076 (__v16sf) __B,
12077 (__v16si) __C,
12078 __imm,
12079 (__mmask16) __U,
12080 _MM_FROUND_CUR_DIRECTION);
12081}
12082
12083extern __inline __m512
12084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12085_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12086 __m512i __C, const int __imm)
12087{
12088 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12089 (__v16sf) __B,
12090 (__v16si) __C,
12091 __imm,
12092 (__mmask16) __U,
12093 _MM_FROUND_CUR_DIRECTION);
12094}
12095
12096extern __inline __m128d
12097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12098_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12099{
12100 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12101 (__v2df) __B,
12102 (__v2di) __C, __imm,
12103 (__mmask8) -1,
12104 _MM_FROUND_CUR_DIRECTION);
12105}
12106
12107extern __inline __m128d
12108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12109_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12110 __m128i __C, const int __imm)
12111{
12112 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12113 (__v2df) __B,
12114 (__v2di) __C, __imm,
12115 (__mmask8) __U,
12116 _MM_FROUND_CUR_DIRECTION);
12117}
12118
12119extern __inline __m128d
12120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12121_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12122 __m128i __C, const int __imm)
12123{
12124 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12125 (__v2df) __B,
12126 (__v2di) __C,
12127 __imm,
12128 (__mmask8) __U,
12129 _MM_FROUND_CUR_DIRECTION);
12130}
12131
12132extern __inline __m128
12133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12134_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12135{
12136 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12137 (__v4sf) __B,
12138 (__v4si) __C, __imm,
12139 (__mmask8) -1,
12140 _MM_FROUND_CUR_DIRECTION);
12141}
12142
12143extern __inline __m128
12144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12146 __m128i __C, const int __imm)
12147{
12148 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12149 (__v4sf) __B,
12150 (__v4si) __C, __imm,
12151 (__mmask8) __U,
12152 _MM_FROUND_CUR_DIRECTION);
12153}
12154
12155extern __inline __m128
12156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12157_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12158 __m128i __C, const int __imm)
12159{
12160 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12161 (__v4sf) __B,
12162 (__v4si) __C, __imm,
12163 (__mmask8) __U,
12164 _MM_FROUND_CUR_DIRECTION);
12165}
12166#else
12167#define _mm512_fixupimm_pd(X, Y, Z, C) \
12168 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12169 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12170 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12171
12172#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12173 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12174 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12175 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12176
12177#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12178 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12179 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12180 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12181
12182#define _mm512_fixupimm_ps(X, Y, Z, C) \
12183 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12184 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12185 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12186
12187#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12188 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12189 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12190 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12191
12192#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12193 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12194 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12195 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12196
12197#define _mm_fixupimm_sd(X, Y, Z, C) \
12198 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12199 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12200 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12201
12202#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12203 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12204 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12205 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12206
12207#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12208 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12209 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12210 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12211
12212#define _mm_fixupimm_ss(X, Y, Z, C) \
12213 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12214 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12215 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12216
12217#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12218 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12219 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12220 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12221
12222#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12223 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12224 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12225 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12226#endif
12227
12228#ifdef __x86_64__
12229extern __inline unsigned long long
12230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12231_mm_cvtss_u64 (__m128 __A)
12232{
12233 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12234 __A,
12235 _MM_FROUND_CUR_DIRECTION);
12236}
12237
12238extern __inline unsigned long long
12239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12240_mm_cvttss_u64 (__m128 __A)
12241{
12242 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12243 __A,
12244 _MM_FROUND_CUR_DIRECTION);
12245}
12246
12247extern __inline long long
12248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12249_mm_cvttss_i64 (__m128 __A)
12250{
12251 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12252 _MM_FROUND_CUR_DIRECTION);
12253}
12254#endif /* __x86_64__ */
12255
12256extern __inline unsigned
12257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258_mm_cvtss_u32 (__m128 __A)
12259{
12260 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12261 _MM_FROUND_CUR_DIRECTION);
12262}
12263
12264extern __inline unsigned
12265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12266_mm_cvttss_u32 (__m128 __A)
12267{
12268 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12269 _MM_FROUND_CUR_DIRECTION);
12270}
12271
12272extern __inline int
12273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12274_mm_cvttss_i32 (__m128 __A)
12275{
12276 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12277 _MM_FROUND_CUR_DIRECTION);
12278}
12279
12280#ifdef __x86_64__
12281extern __inline unsigned long long
12282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12283_mm_cvtsd_u64 (__m128d __A)
12284{
12285 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12286 __A,
12287 _MM_FROUND_CUR_DIRECTION);
12288}
12289
12290extern __inline unsigned long long
12291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12292_mm_cvttsd_u64 (__m128d __A)
12293{
12294 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12295 __A,
12296 _MM_FROUND_CUR_DIRECTION);
12297}
12298
12299extern __inline long long
12300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12301_mm_cvttsd_i64 (__m128d __A)
12302{
12303 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12304 _MM_FROUND_CUR_DIRECTION);
12305}
12306#endif /* __x86_64__ */
12307
12308extern __inline unsigned
12309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12310_mm_cvtsd_u32 (__m128d __A)
12311{
12312 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12313 _MM_FROUND_CUR_DIRECTION);
12314}
12315
12316extern __inline unsigned
12317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12318_mm_cvttsd_u32 (__m128d __A)
12319{
12320 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12321 _MM_FROUND_CUR_DIRECTION);
12322}
12323
12324extern __inline int
12325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12326_mm_cvttsd_i32 (__m128d __A)
12327{
12328 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12329 _MM_FROUND_CUR_DIRECTION);
12330}
12331
12332extern __inline __m512d
12333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12334_mm512_cvtps_pd (__m256 __A)
12335{
12336 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12337 (__v8df)
0fc245cd 12338 _mm512_undefined_pd (),
e2098065 12339 (__mmask8) -1,
12340 _MM_FROUND_CUR_DIRECTION);
12341}
12342
12343extern __inline __m512d
12344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12346{
12347 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12348 (__v8df) __W,
12349 (__mmask8) __U,
12350 _MM_FROUND_CUR_DIRECTION);
12351}
12352
12353extern __inline __m512d
12354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12355_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12356{
12357 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12358 (__v8df)
12359 _mm512_setzero_pd (),
12360 (__mmask8) __U,
12361 _MM_FROUND_CUR_DIRECTION);
12362}
12363
12364extern __inline __m512
12365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12366_mm512_cvtph_ps (__m256i __A)
12367{
12368 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12369 (__v16sf)
0fc245cd 12370 _mm512_undefined_ps (),
e2098065 12371 (__mmask16) -1,
12372 _MM_FROUND_CUR_DIRECTION);
12373}
12374
12375extern __inline __m512
12376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12378{
12379 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12380 (__v16sf) __W,
12381 (__mmask16) __U,
12382 _MM_FROUND_CUR_DIRECTION);
12383}
12384
12385extern __inline __m512
12386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12387_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12388{
12389 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12390 (__v16sf)
12391 _mm512_setzero_ps (),
12392 (__mmask16) __U,
12393 _MM_FROUND_CUR_DIRECTION);
12394}
12395
12396extern __inline __m256
12397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12398_mm512_cvtpd_ps (__m512d __A)
12399{
12400 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12401 (__v8sf)
0fc245cd 12402 _mm256_undefined_ps (),
e2098065 12403 (__mmask8) -1,
12404 _MM_FROUND_CUR_DIRECTION);
12405}
12406
12407extern __inline __m256
12408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12409_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12410{
12411 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12412 (__v8sf) __W,
12413 (__mmask8) __U,
12414 _MM_FROUND_CUR_DIRECTION);
12415}
12416
12417extern __inline __m256
12418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12419_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12420{
12421 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12422 (__v8sf)
12423 _mm256_setzero_ps (),
12424 (__mmask8) __U,
12425 _MM_FROUND_CUR_DIRECTION);
12426}
12427
12428#ifdef __OPTIMIZE__
12429extern __inline __m512
12430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12431_mm512_getexp_ps (__m512 __A)
12432{
12433 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12434 (__v16sf)
0fc245cd 12435 _mm512_undefined_ps (),
e2098065 12436 (__mmask16) -1,
12437 _MM_FROUND_CUR_DIRECTION);
12438}
12439
12440extern __inline __m512
12441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12442_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12443{
12444 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12445 (__v16sf) __W,
12446 (__mmask16) __U,
12447 _MM_FROUND_CUR_DIRECTION);
12448}
12449
12450extern __inline __m512
12451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12452_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12453{
12454 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12455 (__v16sf)
12456 _mm512_setzero_ps (),
12457 (__mmask16) __U,
12458 _MM_FROUND_CUR_DIRECTION);
12459}
12460
12461extern __inline __m512d
12462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12463_mm512_getexp_pd (__m512d __A)
12464{
12465 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12466 (__v8df)
0fc245cd 12467 _mm512_undefined_pd (),
e2098065 12468 (__mmask8) -1,
12469 _MM_FROUND_CUR_DIRECTION);
12470}
12471
12472extern __inline __m512d
12473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12474_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12475{
12476 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12477 (__v8df) __W,
12478 (__mmask8) __U,
12479 _MM_FROUND_CUR_DIRECTION);
12480}
12481
12482extern __inline __m512d
12483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12484_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12485{
12486 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12487 (__v8df)
12488 _mm512_setzero_pd (),
12489 (__mmask8) __U,
12490 _MM_FROUND_CUR_DIRECTION);
12491}
12492
0b7cc9c6 12493extern __inline __m128
12494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12495_mm_getexp_ss (__m128 __A, __m128 __B)
12496{
12497 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12498 (__v4sf) __B,
12499 _MM_FROUND_CUR_DIRECTION);
12500}
12501
12502extern __inline __m128d
12503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12504_mm_getexp_sd (__m128d __A, __m128d __B)
12505{
12506 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12507 (__v2df) __B,
12508 _MM_FROUND_CUR_DIRECTION);
12509}
12510
e2098065 12511extern __inline __m512d
12512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12513_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12514 _MM_MANTISSA_SIGN_ENUM __C)
12515{
12516 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12517 (__C << 2) | __B,
0fc245cd 12518 _mm512_undefined_pd (),
e2098065 12519 (__mmask8) -1,
12520 _MM_FROUND_CUR_DIRECTION);
12521}
12522
12523extern __inline __m512d
12524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12525_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12526 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12527{
12528 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12529 (__C << 2) | __B,
12530 (__v8df) __W, __U,
12531 _MM_FROUND_CUR_DIRECTION);
12532}
12533
12534extern __inline __m512d
12535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12536_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12537 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12538{
12539 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12540 (__C << 2) | __B,
12541 (__v8df)
12542 _mm512_setzero_pd (),
12543 __U,
12544 _MM_FROUND_CUR_DIRECTION);
12545}
12546
12547extern __inline __m512
12548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12549_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12550 _MM_MANTISSA_SIGN_ENUM __C)
12551{
12552 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12553 (__C << 2) | __B,
0fc245cd 12554 _mm512_undefined_ps (),
e2098065 12555 (__mmask16) -1,
12556 _MM_FROUND_CUR_DIRECTION);
12557}
12558
12559extern __inline __m512
12560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12561_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12562 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12563{
12564 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12565 (__C << 2) | __B,
12566 (__v16sf) __W, __U,
12567 _MM_FROUND_CUR_DIRECTION);
12568}
12569
12570extern __inline __m512
12571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12572_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12573 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12574{
12575 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12576 (__C << 2) | __B,
12577 (__v16sf)
12578 _mm512_setzero_ps (),
12579 __U,
12580 _MM_FROUND_CUR_DIRECTION);
12581}
12582
0b7cc9c6 12583extern __inline __m128d
12584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12585_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12586 _MM_MANTISSA_SIGN_ENUM __D)
12587{
12588 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12589 (__v2df) __B,
12590 (__D << 2) | __C,
12591 _MM_FROUND_CUR_DIRECTION);
12592}
12593
12594extern __inline __m128
12595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12596_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12597 _MM_MANTISSA_SIGN_ENUM __D)
12598{
12599 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12600 (__v4sf) __B,
12601 (__D << 2) | __C,
12602 _MM_FROUND_CUR_DIRECTION);
12603}
12604
e2098065 12605#else
12606#define _mm512_getmant_pd(X, B, C) \
12607 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12608 (int)(((C)<<2) | (B)), \
0fc245cd 12609 (__v8df)_mm512_undefined_pd(), \
e2098065 12610 (__mmask8)-1,\
12611 _MM_FROUND_CUR_DIRECTION))
12612
12613#define _mm512_mask_getmant_pd(W, U, X, B, C) \
12614 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12615 (int)(((C)<<2) | (B)), \
12616 (__v8df)(__m512d)(W), \
12617 (__mmask8)(U),\
12618 _MM_FROUND_CUR_DIRECTION))
12619
12620#define _mm512_maskz_getmant_pd(U, X, B, C) \
12621 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12622 (int)(((C)<<2) | (B)), \
0fc245cd 12623 (__v8df)_mm512_setzero_pd(), \
e2098065 12624 (__mmask8)(U),\
12625 _MM_FROUND_CUR_DIRECTION))
12626#define _mm512_getmant_ps(X, B, C) \
12627 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12628 (int)(((C)<<2) | (B)), \
0fc245cd 12629 (__v16sf)_mm512_undefined_ps(), \
e2098065 12630 (__mmask16)-1,\
12631 _MM_FROUND_CUR_DIRECTION))
12632
12633#define _mm512_mask_getmant_ps(W, U, X, B, C) \
12634 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12635 (int)(((C)<<2) | (B)), \
12636 (__v16sf)(__m512)(W), \
12637 (__mmask16)(U),\
12638 _MM_FROUND_CUR_DIRECTION))
12639
12640#define _mm512_maskz_getmant_ps(U, X, B, C) \
12641 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12642 (int)(((C)<<2) | (B)), \
0fc245cd 12643 (__v16sf)_mm512_setzero_ps(), \
e2098065 12644 (__mmask16)(U),\
12645 _MM_FROUND_CUR_DIRECTION))
0b7cc9c6 12646#define _mm_getmant_sd(X, Y, C, D) \
12647 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12648 (__v2df)(__m128d)(Y), \
12649 (int)(((D)<<2) | (C)), \
12650 _MM_FROUND_CUR_DIRECTION))
12651
12652#define _mm_getmant_ss(X, Y, C, D) \
12653 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12654 (__v4sf)(__m128)(Y), \
12655 (int)(((D)<<2) | (C)), \
12656 _MM_FROUND_CUR_DIRECTION))
12657
12658#define _mm_getexp_ss(A, B) \
12659 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12660 _MM_FROUND_CUR_DIRECTION))
12661
12662#define _mm_getexp_sd(A, B) \
12663 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12664 _MM_FROUND_CUR_DIRECTION))
12665
e2098065 12666#define _mm512_getexp_ps(A) \
12667 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0fc245cd 12668 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
e2098065 12669
12670#define _mm512_mask_getexp_ps(W, U, A) \
12671 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12672 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12673
12674#define _mm512_maskz_getexp_ps(U, A) \
12675 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12676 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12677
12678#define _mm512_getexp_pd(A) \
12679 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0fc245cd 12680 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
e2098065 12681
12682#define _mm512_mask_getexp_pd(W, U, A) \
12683 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12684 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12685
12686#define _mm512_maskz_getexp_pd(U, A) \
12687 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12688 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12689#endif
12690
12691#ifdef __OPTIMIZE__
12692extern __inline __m512
12693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12694_mm512_roundscale_ps (__m512 __A, const int __imm)
12695{
12696 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0fc245cd 12697 (__v16sf)
12698 _mm512_undefined_ps (),
12699 -1,
e2098065 12700 _MM_FROUND_CUR_DIRECTION);
12701}
12702
12703extern __inline __m512
12704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12705_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12706 const int __imm)
12707{
12708 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12709 (__v16sf) __A,
12710 (__mmask16) __B,
12711 _MM_FROUND_CUR_DIRECTION);
12712}
12713
12714extern __inline __m512
12715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12716_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12717{
12718 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12719 __imm,
12720 (__v16sf)
12721 _mm512_setzero_ps (),
12722 (__mmask16) __A,
12723 _MM_FROUND_CUR_DIRECTION);
12724}
12725
12726extern __inline __m512d
12727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12728_mm512_roundscale_pd (__m512d __A, const int __imm)
12729{
12730 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0fc245cd 12731 (__v8df)
12732 _mm512_undefined_pd (),
12733 -1,
e2098065 12734 _MM_FROUND_CUR_DIRECTION);
12735}
12736
12737extern __inline __m512d
12738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12739_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12740 const int __imm)
12741{
12742 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12743 (__v8df) __A,
12744 (__mmask8) __B,
12745 _MM_FROUND_CUR_DIRECTION);
12746}
12747
12748extern __inline __m512d
12749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12750_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12751{
12752 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12753 __imm,
12754 (__v8df)
12755 _mm512_setzero_pd (),
12756 (__mmask8) __A,
12757 _MM_FROUND_CUR_DIRECTION);
12758}
12759
0b7cc9c6 12760extern __inline __m128
12761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12762_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12763{
12764 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12765 (__v4sf) __B, __imm,
12766 _MM_FROUND_CUR_DIRECTION);
12767}
12768
12769extern __inline __m128d
12770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12771_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12772{
12773 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12774 (__v2df) __B, __imm,
12775 _MM_FROUND_CUR_DIRECTION);
12776}
12777
e2098065 12778#else
12779#define _mm512_roundscale_ps(A, B) \
12780 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0fc245cd 12781 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
e2098065 12782#define _mm512_mask_roundscale_ps(A, B, C, D) \
12783 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12784 (int)(D), \
12785 (__v16sf)(__m512)(A), \
12786 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12787#define _mm512_maskz_roundscale_ps(A, B, C) \
12788 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12789 (int)(C), \
12790 (__v16sf)_mm512_setzero_ps(),\
12791 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12792#define _mm512_roundscale_pd(A, B) \
12793 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0fc245cd 12794 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
e2098065 12795#define _mm512_mask_roundscale_pd(A, B, C, D) \
12796 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12797 (int)(D), \
12798 (__v8df)(__m512d)(A), \
12799 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12800#define _mm512_maskz_roundscale_pd(A, B, C) \
12801 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12802 (int)(C), \
12803 (__v8df)_mm512_setzero_pd(),\
12804 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
0b7cc9c6 12805#define _mm_roundscale_ss(A, B, C) \
12806 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12807 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12808#define _mm_roundscale_sd(A, B, C) \
12809 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12810 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
e2098065 12811#endif
12812
12813#ifdef __OPTIMIZE__
12814extern __inline __mmask8
12815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12816_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12817{
12818 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12819 (__v8df) __Y, __P,
12820 (__mmask8) -1,
12821 _MM_FROUND_CUR_DIRECTION);
12822}
12823
12824extern __inline __mmask16
12825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12826_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12827{
12828 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12829 (__v16sf) __Y, __P,
12830 (__mmask16) -1,
12831 _MM_FROUND_CUR_DIRECTION);
12832}
12833
12834extern __inline __mmask16
12835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12836_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12837{
12838 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12839 (__v16sf) __Y, __P,
12840 (__mmask16) __U,
12841 _MM_FROUND_CUR_DIRECTION);
12842}
12843
12844extern __inline __mmask8
12845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12846_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12847{
12848 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12849 (__v8df) __Y, __P,
12850 (__mmask8) __U,
12851 _MM_FROUND_CUR_DIRECTION);
12852}
12853
12854extern __inline __mmask8
12855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12856_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12857{
12858 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12859 (__v2df) __Y, __P,
12860 (__mmask8) -1,
12861 _MM_FROUND_CUR_DIRECTION);
12862}
12863
12864extern __inline __mmask8
12865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12866_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12867{
12868 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12869 (__v2df) __Y, __P,
12870 (__mmask8) __M,
12871 _MM_FROUND_CUR_DIRECTION);
12872}
12873
12874extern __inline __mmask8
12875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12876_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12877{
12878 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12879 (__v4sf) __Y, __P,
12880 (__mmask8) -1,
12881 _MM_FROUND_CUR_DIRECTION);
12882}
12883
12884extern __inline __mmask8
12885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12886_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12887{
12888 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12889 (__v4sf) __Y, __P,
12890 (__mmask8) __M,
12891 _MM_FROUND_CUR_DIRECTION);
12892}
12893
12894#else
12895#define _mm512_cmp_pd_mask(X, Y, P) \
12896 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12897 (__v8df)(__m512d)(Y), (int)(P),\
12898 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12899
12900#define _mm512_cmp_ps_mask(X, Y, P) \
12901 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12902 (__v16sf)(__m512)(Y), (int)(P),\
12903 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12904
12905#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12906 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12907 (__v8df)(__m512d)(Y), (int)(P),\
12908 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12909
12910#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12911 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12912 (__v16sf)(__m512)(Y), (int)(P),\
12913 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12914
12915#define _mm_cmp_sd_mask(X, Y, P) \
12916 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12917 (__v2df)(__m128d)(Y), (int)(P),\
12918 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12919
12920#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12921 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12922 (__v2df)(__m128d)(Y), (int)(P),\
12923 M,_MM_FROUND_CUR_DIRECTION))
12924
12925#define _mm_cmp_ss_mask(X, Y, P) \
12926 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12927 (__v4sf)(__m128)(Y), (int)(P), \
12928 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12929
12930#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12931 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12932 (__v4sf)(__m128)(Y), (int)(P), \
12933 M,_MM_FROUND_CUR_DIRECTION))
12934#endif
12935
765faa80 12936extern __inline __mmask16
12937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12938_mm512_kmov (__mmask16 __A)
12939{
12940 return __builtin_ia32_kmov16 (__A);
12941}
12942
889d21f6 12943extern __inline __m512
12944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12945_mm512_castpd_ps (__m512d __A)
12946{
12947 return (__m512) (__A);
12948}
12949
12950extern __inline __m512i
12951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12952_mm512_castpd_si512 (__m512d __A)
12953{
12954 return (__m512i) (__A);
12955}
12956
12957extern __inline __m512d
12958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12959_mm512_castps_pd (__m512 __A)
12960{
12961 return (__m512d) (__A);
12962}
12963
12964extern __inline __m512i
12965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12966_mm512_castps_si512 (__m512 __A)
12967{
12968 return (__m512i) (__A);
12969}
12970
12971extern __inline __m512
12972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12973_mm512_castsi512_ps (__m512i __A)
12974{
12975 return (__m512) (__A);
12976}
12977
12978extern __inline __m512d
12979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12980_mm512_castsi512_pd (__m512i __A)
12981{
12982 return (__m512d) (__A);
12983}
12984
12985extern __inline __m128d
12986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12987_mm512_castpd512_pd128 (__m512d __A)
12988{
12989 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
12990}
12991
12992extern __inline __m128
12993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12994_mm512_castps512_ps128 (__m512 __A)
12995{
12996 return _mm512_extractf32x4_ps(__A, 0);
12997}
12998
12999extern __inline __m128i
13000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13001_mm512_castsi512_si128 (__m512i __A)
13002{
13003 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13004}
13005
13006extern __inline __m256d
13007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13008_mm512_castpd512_pd256 (__m512d __A)
13009{
13010 return _mm512_extractf64x4_pd(__A, 0);
13011}
13012
13013extern __inline __m256
13014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13015_mm512_castps512_ps256 (__m512 __A)
13016{
13017 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13018}
13019
13020extern __inline __m256i
13021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13022_mm512_castsi512_si256 (__m512i __A)
13023{
13024 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13025}
13026
13027extern __inline __m512d
13028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13029_mm512_castpd128_pd512 (__m128d __A)
13030{
13031 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13032}
13033
13034extern __inline __m512
13035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13036_mm512_castps128_ps512 (__m128 __A)
13037{
13038 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13039}
13040
13041extern __inline __m512i
13042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13043_mm512_castsi128_si512 (__m128i __A)
13044{
13045 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13046}
13047
13048extern __inline __m512d
13049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13050_mm512_castpd256_pd512 (__m256d __A)
13051{
13052 return __builtin_ia32_pd512_256pd (__A);
13053}
13054
13055extern __inline __m512
13056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13057_mm512_castps256_ps512 (__m256 __A)
13058{
13059 return __builtin_ia32_ps512_256ps (__A);
13060}
13061
13062extern __inline __m512i
13063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13064_mm512_castsi256_si512 (__m256i __A)
13065{
13066 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13067}
13068
13069extern __inline __mmask16
13070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13071_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13072{
13073 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13074 (__v16si) __B, 0,
13075 (__mmask16) -1);
13076}
13077
13078extern __inline __mmask16
13079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13080_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13081{
13082 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13083 (__v16si) __B, 0, __U);
13084}
13085
13086extern __inline __mmask8
13087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13088_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13089{
13090 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13091 (__v8di) __B, 0, __U);
13092}
13093
13094extern __inline __mmask8
13095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13096_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13097{
13098 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13099 (__v8di) __B, 0,
13100 (__mmask8) -1);
13101}
13102
13103extern __inline __mmask16
13104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13105_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13106{
13107 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13108 (__v16si) __B, 6,
13109 (__mmask16) -1);
13110}
13111
13112extern __inline __mmask16
13113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13114_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13115{
13116 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13117 (__v16si) __B, 6, __U);
13118}
13119
13120extern __inline __mmask8
13121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13122_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13123{
13124 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13125 (__v8di) __B, 6, __U);
13126}
13127
13128extern __inline __mmask8
13129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13130_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13131{
13132 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13133 (__v8di) __B, 6,
13134 (__mmask8) -1);
13135}
13136
e2098065 13137#ifdef __DISABLE_AVX512F__
13138#undef __DISABLE_AVX512F__
13139#pragma GCC pop_options
13140#endif /* __DISABLE_AVX512F__ */
13141
13142#endif /* _AVX512FINTRIN_H_INCLUDED */