]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512fintrin.h
AVX512F: Add helper enumeration for ternary logic intrinsics.
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
7adcbafe 1/* Copyright (C) 2013-2022 Free Software Foundation, Inc.
756c5857
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
2069d6fc 41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
756c5857 42typedef int __v16si __attribute__ ((__vector_size__ (64)));
2069d6fc 43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
7d9088c2 44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
2069d6fc 45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
7d9088c2 46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
2069d6fc 47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
756c5857
AI
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
c6b0037d
MG
55/* Unaligned version of the same type. */
56typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
756c5857
AI
60typedef unsigned char __mmask8;
61typedef unsigned short __mmask16;
62
dcb2c527
JJ
63extern __inline __mmask16
64__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65_mm512_int2mask (int __M)
66{
67 return (__mmask16) __M;
68}
69
70extern __inline int
71__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72_mm512_mask2int (__mmask16 __M)
73{
74 return (int) __M;
75}
76
756c5857
AI
77extern __inline __m512i
78__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79_mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
82{
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
85}
86
87/* Create the vector [A B C D E F G H I J K L M N O P]. */
88extern __inline __m512i
89__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90_mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
94{
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
98}
99
4e6a811f
JJ
100extern __inline __m512i
101__attribute__((__gnu_inline__, __always_inline__, __artificial__))
102_mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
103 short __q27, short __q26, short __q25, short __q24,
104 short __q23, short __q22, short __q21, short __q20,
105 short __q19, short __q18, short __q17, short __q16,
106 short __q15, short __q14, short __q13, short __q12,
107 short __q11, short __q10, short __q09, short __q08,
108 short __q07, short __q06, short __q05, short __q04,
109 short __q03, short __q02, short __q01, short __q00)
110{
111 return __extension__ (__m512i)(__v32hi){
112 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
113 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
114 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
115 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
116 };
117}
118
119extern __inline __m512i
120__attribute__((__gnu_inline__, __always_inline__, __artificial__))
121_mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
122 char __q59, char __q58, char __q57, char __q56,
123 char __q55, char __q54, char __q53, char __q52,
124 char __q51, char __q50, char __q49, char __q48,
125 char __q47, char __q46, char __q45, char __q44,
126 char __q43, char __q42, char __q41, char __q40,
127 char __q39, char __q38, char __q37, char __q36,
128 char __q35, char __q34, char __q33, char __q32,
129 char __q31, char __q30, char __q29, char __q28,
130 char __q27, char __q26, char __q25, char __q24,
131 char __q23, char __q22, char __q21, char __q20,
132 char __q19, char __q18, char __q17, char __q16,
133 char __q15, char __q14, char __q13, char __q12,
134 char __q11, char __q10, char __q09, char __q08,
135 char __q07, char __q06, char __q05, char __q04,
136 char __q03, char __q02, char __q01, char __q00)
137{
138 return __extension__ (__m512i)(__v64qi){
139 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
140 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
141 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
142 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
143 __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
144 __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
145 __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
146 __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
147 };
148}
149
756c5857
AI
150extern __inline __m512d
151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
152_mm512_set_pd (double __A, double __B, double __C, double __D,
153 double __E, double __F, double __G, double __H)
154{
155 return __extension__ (__m512d)
156 { __H, __G, __F, __E, __D, __C, __B, __A };
157}
158
159extern __inline __m512
160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161_mm512_set_ps (float __A, float __B, float __C, float __D,
162 float __E, float __F, float __G, float __H,
163 float __I, float __J, float __K, float __L,
164 float __M, float __N, float __O, float __P)
165{
166 return __extension__ (__m512)
167 { __P, __O, __N, __M, __L, __K, __J, __I,
168 __H, __G, __F, __E, __D, __C, __B, __A };
169}
170
171#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
172 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
173
174#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
175 e8,e9,e10,e11,e12,e13,e14,e15) \
176 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
177
178#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
179 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
180
181#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
182 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
183
0b192937
UD
184extern __inline __m512
185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186_mm512_undefined_ps (void)
187{
188 __m512 __Y = __Y;
189 return __Y;
190}
191
dcb2c527
JJ
192#define _mm512_undefined _mm512_undefined_ps
193
0b192937
UD
194extern __inline __m512d
195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
196_mm512_undefined_pd (void)
197{
198 __m512d __Y = __Y;
199 return __Y;
200}
201
202extern __inline __m512i
203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4271e5cb 204_mm512_undefined_epi32 (void)
0b192937
UD
205{
206 __m512i __Y = __Y;
207 return __Y;
208}
209
4271e5cb
UB
210#define _mm512_undefined_si512 _mm512_undefined_epi32
211
7d9088c2
UD
212extern __inline __m512i
213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
214_mm512_set1_epi8 (char __A)
215{
216 return __extension__ (__m512i)(__v64qi)
217 { __A, __A, __A, __A, __A, __A, __A, __A,
218 __A, __A, __A, __A, __A, __A, __A, __A,
219 __A, __A, __A, __A, __A, __A, __A, __A,
220 __A, __A, __A, __A, __A, __A, __A, __A,
221 __A, __A, __A, __A, __A, __A, __A, __A,
222 __A, __A, __A, __A, __A, __A, __A, __A,
223 __A, __A, __A, __A, __A, __A, __A, __A,
224 __A, __A, __A, __A, __A, __A, __A, __A };
225}
226
227extern __inline __m512i
228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229_mm512_set1_epi16 (short __A)
230{
231 return __extension__ (__m512i)(__v32hi)
232 { __A, __A, __A, __A, __A, __A, __A, __A,
233 __A, __A, __A, __A, __A, __A, __A, __A,
234 __A, __A, __A, __A, __A, __A, __A, __A,
235 __A, __A, __A, __A, __A, __A, __A, __A };
236}
237
2b2384e8
UD
238extern __inline __m512d
239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240_mm512_set1_pd (double __A)
241{
43373412 242 return __extension__ (__m512d)(__v8df)
243 { __A, __A, __A, __A, __A, __A, __A, __A };
2b2384e8
UD
244}
245
246extern __inline __m512
247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
248_mm512_set1_ps (float __A)
249{
43373412 250 return __extension__ (__m512)(__v16sf)
251 { __A, __A, __A, __A, __A, __A, __A, __A,
252 __A, __A, __A, __A, __A, __A, __A, __A };
2b2384e8
UD
253}
254
7d9088c2
UD
255/* Create the vector [A B C D A B C D A B C D A B C D]. */
256extern __inline __m512i
257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
259{
260 return __extension__ (__m512i)(__v16si)
261 { __D, __C, __B, __A, __D, __C, __B, __A,
262 __D, __C, __B, __A, __D, __C, __B, __A };
263}
264
265extern __inline __m512i
266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
267_mm512_set4_epi64 (long long __A, long long __B, long long __C,
268 long long __D)
269{
270 return __extension__ (__m512i) (__v8di)
271 { __D, __C, __B, __A, __D, __C, __B, __A };
272}
273
274extern __inline __m512d
275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276_mm512_set4_pd (double __A, double __B, double __C, double __D)
277{
278 return __extension__ (__m512d)
279 { __D, __C, __B, __A, __D, __C, __B, __A };
280}
281
282extern __inline __m512
283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
284_mm512_set4_ps (float __A, float __B, float __C, float __D)
285{
286 return __extension__ (__m512)
287 { __D, __C, __B, __A, __D, __C, __B, __A,
288 __D, __C, __B, __A, __D, __C, __B, __A };
289}
290
291#define _mm512_setr4_epi64(e0,e1,e2,e3) \
292 _mm512_set4_epi64(e3,e2,e1,e0)
293
294#define _mm512_setr4_epi32(e0,e1,e2,e3) \
295 _mm512_set4_epi32(e3,e2,e1,e0)
296
297#define _mm512_setr4_pd(e0,e1,e2,e3) \
298 _mm512_set4_pd(e3,e2,e1,e0)
299
300#define _mm512_setr4_ps(e0,e1,e2,e3) \
301 _mm512_set4_ps(e3,e2,e1,e0)
302
756c5857
AI
303extern __inline __m512
304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
305_mm512_setzero_ps (void)
306{
307 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
308 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
309}
310
4e6a811f
JJ
311extern __inline __m512
312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
313_mm512_setzero (void)
314{
315 return _mm512_setzero_ps ();
316}
317
756c5857
AI
318extern __inline __m512d
319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320_mm512_setzero_pd (void)
321{
322 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
323}
324
7d9088c2
UD
325extern __inline __m512i
326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327_mm512_setzero_epi32 (void)
328{
329 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
330}
331
756c5857
AI
332extern __inline __m512i
333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334_mm512_setzero_si512 (void)
335{
336 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
337}
338
339extern __inline __m512d
340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
342{
343 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
344 (__v8df) __W,
345 (__mmask8) __U);
346}
347
348extern __inline __m512d
349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
350_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
351{
352 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
353 (__v8df)
354 _mm512_setzero_pd (),
355 (__mmask8) __U);
356}
357
358extern __inline __m512
359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
361{
362 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
363 (__v16sf) __W,
364 (__mmask16) __U);
365}
366
367extern __inline __m512
368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
369_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
370{
371 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
372 (__v16sf)
373 _mm512_setzero_ps (),
374 (__mmask16) __U);
375}
376
377extern __inline __m512d
378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
379_mm512_load_pd (void const *__P)
380{
381 return *(__m512d *) __P;
382}
383
384extern __inline __m512d
385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
386_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
387{
388 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
389 (__v8df) __W,
390 (__mmask8) __U);
391}
392
393extern __inline __m512d
394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
396{
397 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
398 (__v8df)
399 _mm512_setzero_pd (),
400 (__mmask8) __U);
401}
402
403extern __inline void
404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405_mm512_store_pd (void *__P, __m512d __A)
406{
407 *(__m512d *) __P = __A;
408}
409
410extern __inline void
411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
413{
414 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
415 (__mmask8) __U);
416}
417
418extern __inline __m512
419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
420_mm512_load_ps (void const *__P)
421{
422 return *(__m512 *) __P;
423}
424
425extern __inline __m512
426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
427_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
428{
429 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
430 (__v16sf) __W,
431 (__mmask16) __U);
432}
433
434extern __inline __m512
435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
437{
438 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
439 (__v16sf)
440 _mm512_setzero_ps (),
441 (__mmask16) __U);
442}
443
444extern __inline void
445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446_mm512_store_ps (void *__P, __m512 __A)
447{
448 *(__m512 *) __P = __A;
449}
450
451extern __inline void
452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
454{
455 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
456 (__mmask16) __U);
457}
458
459extern __inline __m512i
460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
462{
463 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
464 (__v8di) __W,
465 (__mmask8) __U);
466}
467
468extern __inline __m512i
469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
470_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
471{
472 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
473 (__v8di)
474 _mm512_setzero_si512 (),
475 (__mmask8) __U);
476}
477
478extern __inline __m512i
479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
480_mm512_load_epi64 (void const *__P)
481{
482 return *(__m512i *) __P;
483}
484
485extern __inline __m512i
486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
487_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
488{
489 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
490 (__v8di) __W,
491 (__mmask8) __U);
492}
493
494extern __inline __m512i
495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
497{
498 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
499 (__v8di)
500 _mm512_setzero_si512 (),
501 (__mmask8) __U);
502}
503
504extern __inline void
505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
506_mm512_store_epi64 (void *__P, __m512i __A)
507{
508 *(__m512i *) __P = __A;
509}
510
511extern __inline void
512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
513_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
514{
515 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
516 (__mmask8) __U);
517}
518
519extern __inline __m512i
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
522{
523 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
524 (__v16si) __W,
525 (__mmask16) __U);
526}
527
528extern __inline __m512i
529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
530_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
531{
532 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
533 (__v16si)
534 _mm512_setzero_si512 (),
535 (__mmask16) __U);
536}
537
538extern __inline __m512i
539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
540_mm512_load_si512 (void const *__P)
541{
542 return *(__m512i *) __P;
543}
544
545extern __inline __m512i
546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547_mm512_load_epi32 (void const *__P)
548{
549 return *(__m512i *) __P;
550}
551
552extern __inline __m512i
553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
555{
556 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
557 (__v16si) __W,
558 (__mmask16) __U);
559}
560
561extern __inline __m512i
562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
563_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
564{
565 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
566 (__v16si)
567 _mm512_setzero_si512 (),
568 (__mmask16) __U);
569}
570
571extern __inline void
572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
573_mm512_store_si512 (void *__P, __m512i __A)
574{
575 *(__m512i *) __P = __A;
576}
577
578extern __inline void
579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
580_mm512_store_epi32 (void *__P, __m512i __A)
581{
582 *(__m512i *) __P = __A;
583}
584
585extern __inline void
586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
587_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
588{
589 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
590 (__mmask16) __U);
591}
592
593extern __inline __m512i
594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
595_mm512_mullo_epi32 (__m512i __A, __m512i __B)
596{
2069d6fc 597 return (__m512i) ((__v16su) __A * (__v16su) __B);
756c5857
AI
598}
599
600extern __inline __m512i
601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
603{
604 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
605 (__v16si) __B,
606 (__v16si)
607 _mm512_setzero_si512 (),
608 __M);
609}
610
611extern __inline __m512i
612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
614{
615 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
616 (__v16si) __B,
617 (__v16si) __W, __M);
618}
619
503ac4e0
JJ
620extern __inline __m512i
621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622_mm512_mullox_epi64 (__m512i __A, __m512i __B)
623{
624 return (__m512i) ((__v8du) __A * (__v8du) __B);
625}
626
627extern __inline __m512i
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
630{
631 return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
632}
633
756c5857
AI
634extern __inline __m512i
635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
636_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
637{
638 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
639 (__v16si) __Y,
640 (__v16si)
4271e5cb 641 _mm512_undefined_epi32 (),
756c5857
AI
642 (__mmask16) -1);
643}
644
645extern __inline __m512i
646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
647_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
648{
649 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
650 (__v16si) __Y,
651 (__v16si) __W,
652 (__mmask16) __U);
653}
654
655extern __inline __m512i
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
658{
659 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
660 (__v16si) __Y,
661 (__v16si)
662 _mm512_setzero_si512 (),
663 (__mmask16) __U);
664}
665
666extern __inline __m512i
667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
668_mm512_srav_epi32 (__m512i __X, __m512i __Y)
669{
670 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
671 (__v16si) __Y,
672 (__v16si)
4271e5cb 673 _mm512_undefined_epi32 (),
756c5857
AI
674 (__mmask16) -1);
675}
676
677extern __inline __m512i
678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
679_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
680{
681 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
682 (__v16si) __Y,
683 (__v16si) __W,
684 (__mmask16) __U);
685}
686
687extern __inline __m512i
688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
690{
691 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
692 (__v16si) __Y,
693 (__v16si)
694 _mm512_setzero_si512 (),
695 (__mmask16) __U);
696}
697
698extern __inline __m512i
699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
700_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
701{
702 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
703 (__v16si) __Y,
704 (__v16si)
4271e5cb 705 _mm512_undefined_epi32 (),
756c5857
AI
706 (__mmask16) -1);
707}
708
709extern __inline __m512i
710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
712{
713 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
714 (__v16si) __Y,
715 (__v16si) __W,
716 (__mmask16) __U);
717}
718
719extern __inline __m512i
720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
721_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
722{
723 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
724 (__v16si) __Y,
725 (__v16si)
726 _mm512_setzero_si512 (),
727 (__mmask16) __U);
728}
729
730extern __inline __m512i
731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
732_mm512_add_epi64 (__m512i __A, __m512i __B)
733{
2069d6fc 734 return (__m512i) ((__v8du) __A + (__v8du) __B);
756c5857
AI
735}
736
737extern __inline __m512i
738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
740{
741 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
742 (__v8di) __B,
743 (__v8di) __W,
744 (__mmask8) __U);
745}
746
747extern __inline __m512i
748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
749_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
750{
751 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
752 (__v8di) __B,
753 (__v8di)
754 _mm512_setzero_si512 (),
755 (__mmask8) __U);
756}
757
758extern __inline __m512i
759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760_mm512_sub_epi64 (__m512i __A, __m512i __B)
761{
2069d6fc 762 return (__m512i) ((__v8du) __A - (__v8du) __B);
756c5857
AI
763}
764
765extern __inline __m512i
766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
768{
769 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
770 (__v8di) __B,
771 (__v8di) __W,
772 (__mmask8) __U);
773}
774
775extern __inline __m512i
776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
778{
779 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
780 (__v8di) __B,
781 (__v8di)
782 _mm512_setzero_si512 (),
783 (__mmask8) __U);
784}
785
786extern __inline __m512i
787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
788_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
789{
790 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
791 (__v8di) __Y,
792 (__v8di)
0b192937 793 _mm512_undefined_pd (),
756c5857
AI
794 (__mmask8) -1);
795}
796
797extern __inline __m512i
798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
799_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
800{
801 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
802 (__v8di) __Y,
803 (__v8di) __W,
804 (__mmask8) __U);
805}
806
807extern __inline __m512i
808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
810{
811 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
812 (__v8di) __Y,
813 (__v8di)
814 _mm512_setzero_si512 (),
815 (__mmask8) __U);
816}
817
818extern __inline __m512i
819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
820_mm512_srav_epi64 (__m512i __X, __m512i __Y)
821{
822 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
823 (__v8di) __Y,
824 (__v8di)
4271e5cb 825 _mm512_undefined_epi32 (),
756c5857
AI
826 (__mmask8) -1);
827}
828
829extern __inline __m512i
830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
831_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
832{
833 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
834 (__v8di) __Y,
835 (__v8di) __W,
836 (__mmask8) __U);
837}
838
839extern __inline __m512i
840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
841_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
842{
843 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
844 (__v8di) __Y,
845 (__v8di)
846 _mm512_setzero_si512 (),
847 (__mmask8) __U);
848}
849
850extern __inline __m512i
851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
852_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
853{
854 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
855 (__v8di) __Y,
856 (__v8di)
4271e5cb 857 _mm512_undefined_epi32 (),
756c5857
AI
858 (__mmask8) -1);
859}
860
861extern __inline __m512i
862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
864{
865 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
866 (__v8di) __Y,
867 (__v8di) __W,
868 (__mmask8) __U);
869}
870
871extern __inline __m512i
872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
874{
875 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
876 (__v8di) __Y,
877 (__v8di)
878 _mm512_setzero_si512 (),
879 (__mmask8) __U);
880}
881
882extern __inline __m512i
883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884_mm512_add_epi32 (__m512i __A, __m512i __B)
885{
2069d6fc 886 return (__m512i) ((__v16su) __A + (__v16su) __B);
756c5857
AI
887}
888
889extern __inline __m512i
890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
891_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
892{
893 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
894 (__v16si) __B,
895 (__v16si) __W,
896 (__mmask16) __U);
897}
898
899extern __inline __m512i
900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
901_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
902{
903 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
904 (__v16si) __B,
905 (__v16si)
906 _mm512_setzero_si512 (),
907 (__mmask16) __U);
908}
909
910extern __inline __m512i
911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
912_mm512_mul_epi32 (__m512i __X, __m512i __Y)
913{
914 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
915 (__v16si) __Y,
916 (__v8di)
4271e5cb 917 _mm512_undefined_epi32 (),
756c5857
AI
918 (__mmask8) -1);
919}
920
921extern __inline __m512i
922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
923_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
924{
925 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
926 (__v16si) __Y,
927 (__v8di) __W, __M);
928}
929
930extern __inline __m512i
931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
932_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
933{
934 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
935 (__v16si) __Y,
936 (__v8di)
937 _mm512_setzero_si512 (),
938 __M);
939}
940
941extern __inline __m512i
942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
943_mm512_sub_epi32 (__m512i __A, __m512i __B)
944{
2069d6fc 945 return (__m512i) ((__v16su) __A - (__v16su) __B);
756c5857
AI
946}
947
948extern __inline __m512i
949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
950_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
951{
952 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
953 (__v16si) __B,
954 (__v16si) __W,
955 (__mmask16) __U);
956}
957
958extern __inline __m512i
959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
961{
962 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
963 (__v16si) __B,
964 (__v16si)
965 _mm512_setzero_si512 (),
966 (__mmask16) __U);
967}
968
969extern __inline __m512i
970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971_mm512_mul_epu32 (__m512i __X, __m512i __Y)
972{
973 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
974 (__v16si) __Y,
975 (__v8di)
4271e5cb 976 _mm512_undefined_epi32 (),
756c5857
AI
977 (__mmask8) -1);
978}
979
980extern __inline __m512i
981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
982_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
983{
984 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
985 (__v16si) __Y,
986 (__v8di) __W, __M);
987}
988
989extern __inline __m512i
990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
991_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
992{
993 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
994 (__v16si) __Y,
995 (__v8di)
996 _mm512_setzero_si512 (),
997 __M);
998}
999
1000#ifdef __OPTIMIZE__
1001extern __inline __m512i
1002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003_mm512_slli_epi64 (__m512i __A, unsigned int __B)
1004{
1005 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1006 (__v8di)
4271e5cb 1007 _mm512_undefined_epi32 (),
756c5857
AI
1008 (__mmask8) -1);
1009}
1010
1011extern __inline __m512i
1012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1014 unsigned int __B)
1015{
1016 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1017 (__v8di) __W,
1018 (__mmask8) __U);
1019}
1020
1021extern __inline __m512i
1022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1023_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1024{
1025 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1026 (__v8di)
1027 _mm512_setzero_si512 (),
1028 (__mmask8) __U);
1029}
1030#else
1031#define _mm512_slli_epi64(X, C) \
1032 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1033 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1034 (__mmask8)-1))
1035
1036#define _mm512_mask_slli_epi64(W, U, X, C) \
1037 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1038 (__v8di)(__m512i)(W),\
1039 (__mmask8)(U)))
1040
1041#define _mm512_maskz_slli_epi64(U, X, C) \
1042 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1043 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1044 (__mmask8)(U)))
1045#endif
1046
1047extern __inline __m512i
1048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1049_mm512_sll_epi64 (__m512i __A, __m128i __B)
1050{
1051 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1052 (__v2di) __B,
1053 (__v8di)
4271e5cb 1054 _mm512_undefined_epi32 (),
756c5857
AI
1055 (__mmask8) -1);
1056}
1057
1058extern __inline __m512i
1059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1061{
1062 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1063 (__v2di) __B,
1064 (__v8di) __W,
1065 (__mmask8) __U);
1066}
1067
1068extern __inline __m512i
1069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1070_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1071{
1072 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1073 (__v2di) __B,
1074 (__v8di)
1075 _mm512_setzero_si512 (),
1076 (__mmask8) __U);
1077}
1078
1079#ifdef __OPTIMIZE__
1080extern __inline __m512i
1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082_mm512_srli_epi64 (__m512i __A, unsigned int __B)
1083{
1084 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1085 (__v8di)
4271e5cb 1086 _mm512_undefined_epi32 (),
756c5857
AI
1087 (__mmask8) -1);
1088}
1089
1090extern __inline __m512i
1091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1093 __m512i __A, unsigned int __B)
1094{
1095 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1096 (__v8di) __W,
1097 (__mmask8) __U);
1098}
1099
1100extern __inline __m512i
1101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1102_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1103{
1104 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1105 (__v8di)
1106 _mm512_setzero_si512 (),
1107 (__mmask8) __U);
1108}
1109#else
1110#define _mm512_srli_epi64(X, C) \
1111 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1112 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1113 (__mmask8)-1))
1114
1115#define _mm512_mask_srli_epi64(W, U, X, C) \
1116 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1117 (__v8di)(__m512i)(W),\
1118 (__mmask8)(U)))
1119
1120#define _mm512_maskz_srli_epi64(U, X, C) \
1121 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1122 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1123 (__mmask8)(U)))
1124#endif
1125
1126extern __inline __m512i
1127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1128_mm512_srl_epi64 (__m512i __A, __m128i __B)
1129{
1130 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1131 (__v2di) __B,
1132 (__v8di)
4271e5cb 1133 _mm512_undefined_epi32 (),
756c5857
AI
1134 (__mmask8) -1);
1135}
1136
1137extern __inline __m512i
1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1140{
1141 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1142 (__v2di) __B,
1143 (__v8di) __W,
1144 (__mmask8) __U);
1145}
1146
1147extern __inline __m512i
1148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1150{
1151 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1152 (__v2di) __B,
1153 (__v8di)
1154 _mm512_setzero_si512 (),
1155 (__mmask8) __U);
1156}
1157
1158#ifdef __OPTIMIZE__
1159extern __inline __m512i
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1162{
1163 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1164 (__v8di)
4271e5cb 1165 _mm512_undefined_epi32 (),
756c5857
AI
1166 (__mmask8) -1);
1167}
1168
1169extern __inline __m512i
1170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1172 unsigned int __B)
1173{
1174 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1175 (__v8di) __W,
1176 (__mmask8) __U);
1177}
1178
1179extern __inline __m512i
1180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1181_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1182{
1183 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1184 (__v8di)
1185 _mm512_setzero_si512 (),
1186 (__mmask8) __U);
1187}
1188#else
1189#define _mm512_srai_epi64(X, C) \
1190 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1191 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1192 (__mmask8)-1))
1193
1194#define _mm512_mask_srai_epi64(W, U, X, C) \
1195 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1196 (__v8di)(__m512i)(W),\
1197 (__mmask8)(U)))
1198
1199#define _mm512_maskz_srai_epi64(U, X, C) \
1200 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1201 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1202 (__mmask8)(U)))
1203#endif
1204
1205extern __inline __m512i
1206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1207_mm512_sra_epi64 (__m512i __A, __m128i __B)
1208{
1209 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1210 (__v2di) __B,
1211 (__v8di)
4271e5cb 1212 _mm512_undefined_epi32 (),
756c5857
AI
1213 (__mmask8) -1);
1214}
1215
1216extern __inline __m512i
1217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1219{
1220 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1221 (__v2di) __B,
1222 (__v8di) __W,
1223 (__mmask8) __U);
1224}
1225
1226extern __inline __m512i
1227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1228_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1229{
1230 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1231 (__v2di) __B,
1232 (__v8di)
1233 _mm512_setzero_si512 (),
1234 (__mmask8) __U);
1235}
1236
1237#ifdef __OPTIMIZE__
1238extern __inline __m512i
1239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1241{
1242 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1243 (__v16si)
4271e5cb 1244 _mm512_undefined_epi32 (),
756c5857
AI
1245 (__mmask16) -1);
1246}
1247
1248extern __inline __m512i
1249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1251 unsigned int __B)
1252{
1253 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1254 (__v16si) __W,
1255 (__mmask16) __U);
1256}
1257
1258extern __inline __m512i
1259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1260_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1261{
1262 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1263 (__v16si)
1264 _mm512_setzero_si512 (),
1265 (__mmask16) __U);
1266}
1267#else
1268#define _mm512_slli_epi32(X, C) \
1269 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1270 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1271 (__mmask16)-1))
1272
1273#define _mm512_mask_slli_epi32(W, U, X, C) \
1274 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1275 (__v16si)(__m512i)(W),\
1276 (__mmask16)(U)))
1277
1278#define _mm512_maskz_slli_epi32(U, X, C) \
1279 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1280 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1281 (__mmask16)(U)))
1282#endif
1283
1284extern __inline __m512i
1285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1286_mm512_sll_epi32 (__m512i __A, __m128i __B)
1287{
1288 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1289 (__v4si) __B,
1290 (__v16si)
4271e5cb 1291 _mm512_undefined_epi32 (),
756c5857
AI
1292 (__mmask16) -1);
1293}
1294
1295extern __inline __m512i
1296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1298{
1299 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1300 (__v4si) __B,
1301 (__v16si) __W,
1302 (__mmask16) __U);
1303}
1304
1305extern __inline __m512i
1306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1307_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1308{
1309 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1310 (__v4si) __B,
1311 (__v16si)
1312 _mm512_setzero_si512 (),
1313 (__mmask16) __U);
1314}
1315
1316#ifdef __OPTIMIZE__
1317extern __inline __m512i
1318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1320{
1321 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1322 (__v16si)
4271e5cb 1323 _mm512_undefined_epi32 (),
756c5857
AI
1324 (__mmask16) -1);
1325}
1326
1327extern __inline __m512i
1328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1330 __m512i __A, unsigned int __B)
1331{
1332 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1333 (__v16si) __W,
1334 (__mmask16) __U);
1335}
1336
1337extern __inline __m512i
1338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1339_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1340{
1341 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1342 (__v16si)
1343 _mm512_setzero_si512 (),
1344 (__mmask16) __U);
1345}
1346#else
1347#define _mm512_srli_epi32(X, C) \
1348 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1349 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1350 (__mmask16)-1))
1351
1352#define _mm512_mask_srli_epi32(W, U, X, C) \
1353 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1354 (__v16si)(__m512i)(W),\
1355 (__mmask16)(U)))
1356
1357#define _mm512_maskz_srli_epi32(U, X, C) \
1358 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1359 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1360 (__mmask16)(U)))
1361#endif
1362
1363extern __inline __m512i
1364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1365_mm512_srl_epi32 (__m512i __A, __m128i __B)
1366{
1367 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1368 (__v4si) __B,
1369 (__v16si)
4271e5cb 1370 _mm512_undefined_epi32 (),
756c5857
AI
1371 (__mmask16) -1);
1372}
1373
1374extern __inline __m512i
1375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1377{
1378 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1379 (__v4si) __B,
1380 (__v16si) __W,
1381 (__mmask16) __U);
1382}
1383
1384extern __inline __m512i
1385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1386_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1387{
1388 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1389 (__v4si) __B,
1390 (__v16si)
1391 _mm512_setzero_si512 (),
1392 (__mmask16) __U);
1393}
1394
1395#ifdef __OPTIMIZE__
1396extern __inline __m512i
1397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1398_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1399{
1400 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1401 (__v16si)
4271e5cb 1402 _mm512_undefined_epi32 (),
756c5857
AI
1403 (__mmask16) -1);
1404}
1405
1406extern __inline __m512i
1407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1408_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1409 unsigned int __B)
1410{
1411 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1412 (__v16si) __W,
1413 (__mmask16) __U);
1414}
1415
1416extern __inline __m512i
1417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1418_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1419{
1420 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1421 (__v16si)
1422 _mm512_setzero_si512 (),
1423 (__mmask16) __U);
1424}
1425#else
1426#define _mm512_srai_epi32(X, C) \
1427 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1428 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1429 (__mmask16)-1))
1430
1431#define _mm512_mask_srai_epi32(W, U, X, C) \
1432 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1433 (__v16si)(__m512i)(W),\
1434 (__mmask16)(U)))
1435
1436#define _mm512_maskz_srai_epi32(U, X, C) \
1437 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1438 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1439 (__mmask16)(U)))
1440#endif
1441
1442extern __inline __m512i
1443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1444_mm512_sra_epi32 (__m512i __A, __m128i __B)
1445{
1446 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1447 (__v4si) __B,
1448 (__v16si)
4271e5cb 1449 _mm512_undefined_epi32 (),
756c5857
AI
1450 (__mmask16) -1);
1451}
1452
1453extern __inline __m512i
1454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1455_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1456{
1457 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1458 (__v4si) __B,
1459 (__v16si) __W,
1460 (__mmask16) __U);
1461}
1462
1463extern __inline __m512i
1464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1466{
1467 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1468 (__v4si) __B,
1469 (__v16si)
1470 _mm512_setzero_si512 (),
1471 (__mmask16) __U);
1472}
1473
075691af
AI
1474#ifdef __OPTIMIZE__
1475extern __inline __m128d
1476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1477_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1478{
1479 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1480 (__v2df) __B,
1481 __R);
1482}
1483
1853f5c7
SP
1484extern __inline __m128d
1485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1486_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1487 __m128d __B, const int __R)
1488{
1489 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1490 (__v2df) __B,
1491 (__v2df) __W,
1492 (__mmask8) __U, __R);
1493}
1494
1495extern __inline __m128d
1496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1497_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1498 const int __R)
1499{
1500 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1501 (__v2df) __B,
1502 (__v2df)
1503 _mm_setzero_pd (),
1504 (__mmask8) __U, __R);
1505}
1506
075691af
AI
1507extern __inline __m128
1508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1510{
1511 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1512 (__v4sf) __B,
1513 __R);
1514}
1515
1853f5c7
SP
1516extern __inline __m128
1517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1518_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1519 __m128 __B, const int __R)
1520{
1521 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1522 (__v4sf) __B,
1523 (__v4sf) __W,
1524 (__mmask8) __U, __R);
1525}
1526
1527extern __inline __m128
1528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1529_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1530 const int __R)
1531{
1532 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1533 (__v4sf) __B,
1534 (__v4sf)
1535 _mm_setzero_ps (),
1536 (__mmask8) __U, __R);
1537}
1538
075691af
AI
1539extern __inline __m128d
1540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1541_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1542{
1543 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1544 (__v2df) __B,
1545 __R);
1546}
1547
1853f5c7
SP
1548extern __inline __m128d
1549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1550_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1551 __m128d __B, const int __R)
1552{
1553 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1554 (__v2df) __B,
1555 (__v2df) __W,
1556 (__mmask8) __U, __R);
1557}
1558
1559extern __inline __m128d
1560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1561_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1562 const int __R)
1563{
1564 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1565 (__v2df) __B,
1566 (__v2df)
1567 _mm_setzero_pd (),
1568 (__mmask8) __U, __R);
1569}
1570
075691af
AI
1571extern __inline __m128
1572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1573_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1574{
1575 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1576 (__v4sf) __B,
1577 __R);
1578}
1579
1853f5c7
SP
1580extern __inline __m128
1581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1582_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1583 __m128 __B, const int __R)
1584{
1585 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1586 (__v4sf) __B,
1587 (__v4sf) __W,
1588 (__mmask8) __U, __R);
1589}
1590
1591extern __inline __m128
1592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1594 const int __R)
1595{
1596 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1597 (__v4sf) __B,
1598 (__v4sf)
1599 _mm_setzero_ps (),
1600 (__mmask8) __U, __R);
1601}
1602
075691af
AI
1603#else
1604#define _mm_add_round_sd(A, B, C) \
1605 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1606
1853f5c7
SP
1607#define _mm_mask_add_round_sd(W, U, A, B, C) \
1608 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1609
1610#define _mm_maskz_add_round_sd(U, A, B, C) \
1611 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1612
075691af
AI
1613#define _mm_add_round_ss(A, B, C) \
1614 (__m128)__builtin_ia32_addss_round(A, B, C)
1615
1853f5c7
SP
1616#define _mm_mask_add_round_ss(W, U, A, B, C) \
1617 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1618
1619#define _mm_maskz_add_round_ss(U, A, B, C) \
1620 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1621
075691af
AI
1622#define _mm_sub_round_sd(A, B, C) \
1623 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1624
1853f5c7
SP
1625#define _mm_mask_sub_round_sd(W, U, A, B, C) \
1626 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1627
1628#define _mm_maskz_sub_round_sd(U, A, B, C) \
1629 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1630
075691af
AI
1631#define _mm_sub_round_ss(A, B, C) \
1632 (__m128)__builtin_ia32_subss_round(A, B, C)
1853f5c7
SP
1633
1634#define _mm_mask_sub_round_ss(W, U, A, B, C) \
1635 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1636
1637#define _mm_maskz_sub_round_ss(U, A, B, C) \
1638 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1639
075691af
AI
1640#endif
1641
50d9ca71
HW
1642/* Constant helper to represent the ternary logic operations among
1643 vector A, B and C. */
1644typedef enum
1645{
1646 _MM_TERNLOG_A = 0xF0,
1647 _MM_TERNLOG_B = 0xCC,
1648 _MM_TERNLOG_C = 0xAA
1649} _MM_TERNLOG_ENUM;
1650
756c5857
AI
1651#ifdef __OPTIMIZE__
1652extern __inline __m512i
1653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1654_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1655 const int __imm)
756c5857 1656{
50d9ca71
HW
1657 return (__m512i)
1658 __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1659 (__v8di) __B,
1660 (__v8di) __C,
1661 (unsigned char) __imm,
1662 (__mmask8) -1);
756c5857
AI
1663}
1664
1665extern __inline __m512i
1666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1667_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
b5fd0b71 1668 __m512i __C, const int __imm)
756c5857 1669{
50d9ca71
HW
1670 return (__m512i)
1671 __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1672 (__v8di) __B,
1673 (__v8di) __C,
1674 (unsigned char) __imm,
1675 (__mmask8) __U);
756c5857
AI
1676}
1677
1678extern __inline __m512i
1679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1680_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
b5fd0b71 1681 __m512i __C, const int __imm)
756c5857 1682{
50d9ca71
HW
1683 return (__m512i)
1684 __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1685 (__v8di) __B,
1686 (__v8di) __C,
1687 (unsigned char) __imm,
1688 (__mmask8) __U);
756c5857
AI
1689}
1690
1691extern __inline __m512i
1692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1693_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1694 const int __imm)
756c5857 1695{
50d9ca71
HW
1696 return (__m512i)
1697 __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1698 (__v16si) __B,
1699 (__v16si) __C,
1700 (unsigned char) __imm,
1701 (__mmask16) -1);
756c5857
AI
1702}
1703
1704extern __inline __m512i
1705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1706_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
b5fd0b71 1707 __m512i __C, const int __imm)
756c5857 1708{
50d9ca71
HW
1709 return (__m512i)
1710 __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1711 (__v16si) __B,
1712 (__v16si) __C,
1713 (unsigned char) __imm,
1714 (__mmask16) __U);
756c5857
AI
1715}
1716
1717extern __inline __m512i
1718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1719_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
b5fd0b71 1720 __m512i __C, const int __imm)
756c5857 1721{
50d9ca71
HW
1722 return (__m512i)
1723 __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1724 (__v16si) __B,
1725 (__v16si) __C,
1726 (unsigned char) __imm,
1727 (__mmask16) __U);
756c5857
AI
1728}
1729#else
50d9ca71
HW
1730#define _mm512_ternarylogic_epi64(A, B, C, I) \
1731 ((__m512i) \
1732 __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
1733 (__v8di) (__m512i) (B), \
1734 (__v8di) (__m512i) (C), \
1735 (unsigned char) (I), \
1736 (__mmask8) -1))
1737#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1738 ((__m512i) \
1739 __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
1740 (__v8di) (__m512i) (B), \
1741 (__v8di) (__m512i) (C), \
1742 (unsigned char)(I), \
1743 (__mmask8) (U)))
1744#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1745 ((__m512i) \
1746 __builtin_ia32_pternlogq512_maskz ((__v8di) (__m512i) (A), \
1747 (__v8di) (__m512i) (B), \
1748 (__v8di) (__m512i) (C), \
1749 (unsigned char) (I), \
1750 (__mmask8) (U)))
1751#define _mm512_ternarylogic_epi32(A, B, C, I) \
1752 ((__m512i) \
1753 __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
1754 (__v16si) (__m512i) (B), \
1755 (__v16si) (__m512i) (C), \
1756 (unsigned char) (I), \
1757 (__mmask16) -1))
1758#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1759 ((__m512i) \
1760 __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
1761 (__v16si) (__m512i) (B), \
1762 (__v16si) (__m512i) (C), \
1763 (unsigned char) (I), \
1764 (__mmask16) (U)))
1765#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1766 ((__m512i) \
1767 __builtin_ia32_pternlogd512_maskz ((__v16si) (__m512i) (A), \
1768 (__v16si) (__m512i) (B), \
1769 (__v16si) (__m512i) (C), \
1770 (unsigned char) (I), \
1771 (__mmask16) (U)))
756c5857
AI
1772#endif
1773
1774extern __inline __m512d
1775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1776_mm512_rcp14_pd (__m512d __A)
1777{
1778 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1779 (__v8df)
0b192937 1780 _mm512_undefined_pd (),
756c5857
AI
1781 (__mmask8) -1);
1782}
1783
1784extern __inline __m512d
1785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1786_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1787{
1788 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1789 (__v8df) __W,
1790 (__mmask8) __U);
1791}
1792
1793extern __inline __m512d
1794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1795_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1796{
1797 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1798 (__v8df)
1799 _mm512_setzero_pd (),
1800 (__mmask8) __U);
1801}
1802
1803extern __inline __m512
1804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805_mm512_rcp14_ps (__m512 __A)
1806{
1807 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1808 (__v16sf)
0b192937 1809 _mm512_undefined_ps (),
756c5857
AI
1810 (__mmask16) -1);
1811}
1812
1813extern __inline __m512
1814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1815_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1816{
1817 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1818 (__v16sf) __W,
1819 (__mmask16) __U);
1820}
1821
1822extern __inline __m512
1823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1824_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1825{
1826 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1827 (__v16sf)
1828 _mm512_setzero_ps (),
1829 (__mmask16) __U);
1830}
1831
075691af
AI
1832extern __inline __m128d
1833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1834_mm_rcp14_sd (__m128d __A, __m128d __B)
1835{
df62b4af
IT
1836 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1837 (__v2df) __A);
075691af
AI
1838}
1839
f4ee3a9e
UB
1840extern __inline __m128d
1841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1842_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1843{
1844 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1845 (__v2df) __A,
1846 (__v2df) __W,
1847 (__mmask8) __U);
1848}
1849
1850extern __inline __m128d
1851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1852_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1853{
1854 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1855 (__v2df) __A,
1856 (__v2df) _mm_setzero_ps (),
1857 (__mmask8) __U);
1858}
1859
075691af
AI
1860extern __inline __m128
1861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1862_mm_rcp14_ss (__m128 __A, __m128 __B)
1863{
df62b4af
IT
1864 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1865 (__v4sf) __A);
075691af
AI
1866}
1867
f4ee3a9e
UB
1868extern __inline __m128
1869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1870_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1871{
1872 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1873 (__v4sf) __A,
1874 (__v4sf) __W,
1875 (__mmask8) __U);
1876}
1877
1878extern __inline __m128
1879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1880_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1881{
1882 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1883 (__v4sf) __A,
1884 (__v4sf) _mm_setzero_ps (),
1885 (__mmask8) __U);
1886}
1887
756c5857
AI
1888extern __inline __m512d
1889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1890_mm512_rsqrt14_pd (__m512d __A)
1891{
1892 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1893 (__v8df)
0b192937 1894 _mm512_undefined_pd (),
756c5857
AI
1895 (__mmask8) -1);
1896}
1897
1898extern __inline __m512d
1899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1900_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1901{
1902 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1903 (__v8df) __W,
1904 (__mmask8) __U);
1905}
1906
1907extern __inline __m512d
1908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1909_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1910{
1911 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1912 (__v8df)
1913 _mm512_setzero_pd (),
1914 (__mmask8) __U);
1915}
1916
1917extern __inline __m512
1918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919_mm512_rsqrt14_ps (__m512 __A)
1920{
1921 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1922 (__v16sf)
0b192937 1923 _mm512_undefined_ps (),
756c5857
AI
1924 (__mmask16) -1);
1925}
1926
1927extern __inline __m512
1928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1929_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1930{
1931 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1932 (__v16sf) __W,
1933 (__mmask16) __U);
1934}
1935
1936extern __inline __m512
1937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1938_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1939{
1940 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1941 (__v16sf)
1942 _mm512_setzero_ps (),
1943 (__mmask16) __U);
1944}
1945
075691af
AI
1946extern __inline __m128d
1947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1948_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1949{
df62b4af
IT
1950 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1951 (__v2df) __A);
075691af
AI
1952}
1953
d7a33a4c
JK
1954extern __inline __m128d
1955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1957{
1958 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1959 (__v2df) __A,
1960 (__v2df) __W,
1961 (__mmask8) __U);
1962}
1963
1964extern __inline __m128d
1965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1967{
1968 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1969 (__v2df) __A,
1970 (__v2df) _mm_setzero_pd (),
1971 (__mmask8) __U);
1972}
1973
075691af
AI
1974extern __inline __m128
1975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1976_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1977{
df62b4af
IT
1978 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1979 (__v4sf) __A);
075691af
AI
1980}
1981
d7a33a4c
JK
1982extern __inline __m128
1983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1984_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1985{
1986 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1987 (__v4sf) __A,
1988 (__v4sf) __W,
1989 (__mmask8) __U);
1990}
1991
1992extern __inline __m128
1993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1995{
1996 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1997 (__v4sf) __A,
1998 (__v4sf) _mm_setzero_ps (),
1999 (__mmask8) __U);
2000}
2001
756c5857
AI
2002#ifdef __OPTIMIZE__
2003extern __inline __m512d
2004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2005_mm512_sqrt_round_pd (__m512d __A, const int __R)
2006{
2007 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
2008 (__v8df)
0b192937 2009 _mm512_undefined_pd (),
756c5857
AI
2010 (__mmask8) -1, __R);
2011}
2012
2013extern __inline __m512d
2014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2015_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2016 const int __R)
2017{
2018 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
2019 (__v8df) __W,
2020 (__mmask8) __U, __R);
2021}
2022
2023extern __inline __m512d
2024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2025_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
2026{
2027 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
2028 (__v8df)
2029 _mm512_setzero_pd (),
2030 (__mmask8) __U, __R);
2031}
2032
2033extern __inline __m512
2034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2035_mm512_sqrt_round_ps (__m512 __A, const int __R)
2036{
2037 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2038 (__v16sf)
0b192937 2039 _mm512_undefined_ps (),
756c5857
AI
2040 (__mmask16) -1, __R);
2041}
2042
2043extern __inline __m512
2044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2045_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
2046{
2047 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2048 (__v16sf) __W,
2049 (__mmask16) __U, __R);
2050}
2051
2052extern __inline __m512
2053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2054_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
2055{
2056 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2057 (__v16sf)
2058 _mm512_setzero_ps (),
2059 (__mmask16) __U, __R);
2060}
2061
075691af
AI
2062extern __inline __m128d
2063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
2065{
b10bc0d6
OM
2066 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2067 (__v2df) __A,
2068 (__v2df)
2069 _mm_setzero_pd (),
2070 (__mmask8) -1, __R);
2071}
2072
2073extern __inline __m128d
2074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2075_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2076 const int __R)
2077{
2078 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2079 (__v2df) __A,
2080 (__v2df) __W,
2081 (__mmask8) __U, __R);
2082}
2083
2084extern __inline __m128d
2085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
2087{
2088 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2089 (__v2df) __A,
2090 (__v2df)
2091 _mm_setzero_pd (),
2092 (__mmask8) __U, __R);
075691af
AI
2093}
2094
2095extern __inline __m128
2096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2097_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
2098{
b10bc0d6
OM
2099 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2100 (__v4sf) __A,
2101 (__v4sf)
2102 _mm_setzero_ps (),
2103 (__mmask8) -1, __R);
2104}
2105
2106extern __inline __m128
2107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2108_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2109 const int __R)
2110{
2111 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2112 (__v4sf) __A,
2113 (__v4sf) __W,
2114 (__mmask8) __U, __R);
2115}
2116
2117extern __inline __m128
2118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2119_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
2120{
2121 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2122 (__v4sf) __A,
2123 (__v4sf)
2124 _mm_setzero_ps (),
2125 (__mmask8) __U, __R);
075691af 2126}
756c5857
AI
2127#else
2128#define _mm512_sqrt_round_pd(A, C) \
0b192937 2129 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2130
2131#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
2132 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
2133
2134#define _mm512_maskz_sqrt_round_pd(U, A, C) \
2135 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
2136
2137#define _mm512_sqrt_round_ps(A, C) \
0b192937 2138 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2139
2140#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
2141 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
2142
2143#define _mm512_maskz_sqrt_round_ps(U, A, C) \
2144 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
075691af 2145
b10bc0d6
OM
2146#define _mm_sqrt_round_sd(A, B, C) \
2147 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2148 (__v2df) _mm_setzero_pd (), -1, C)
2149
2150#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
2151 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
2152
2153#define _mm_maskz_sqrt_round_sd(U, A, B, C) \
2154 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2155 (__v2df) _mm_setzero_pd (), U, C)
2156
2157#define _mm_sqrt_round_ss(A, B, C) \
2158 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2159 (__v4sf) _mm_setzero_ps (), -1, C)
2160
2161#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
2162 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
075691af 2163
b10bc0d6
OM
2164#define _mm_maskz_sqrt_round_ss(U, A, B, C) \
2165 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2166 (__v4sf) _mm_setzero_ps (), U, C)
756c5857
AI
2167#endif
2168
93103603
SP
2169#define _mm_mask_sqrt_sd(W, U, A, B) \
2170 _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
2171
2172#define _mm_maskz_sqrt_sd(U, A, B) \
2173 _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
2174
2175#define _mm_mask_sqrt_ss(W, U, A, B) \
2176 _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
2177
2178#define _mm_maskz_sqrt_ss(U, A, B) \
2179 _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
2180
756c5857
AI
2181extern __inline __m512i
2182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2183_mm512_cvtepi8_epi32 (__m128i __A)
2184{
2185 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2186 (__v16si)
4271e5cb 2187 _mm512_undefined_epi32 (),
756c5857
AI
2188 (__mmask16) -1);
2189}
2190
2191extern __inline __m512i
2192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2193_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2194{
2195 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2196 (__v16si) __W,
2197 (__mmask16) __U);
2198}
2199
2200extern __inline __m512i
2201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2203{
2204 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2205 (__v16si)
2206 _mm512_setzero_si512 (),
2207 (__mmask16) __U);
2208}
2209
2210extern __inline __m512i
2211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2212_mm512_cvtepi8_epi64 (__m128i __A)
2213{
2214 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2215 (__v8di)
4271e5cb 2216 _mm512_undefined_epi32 (),
756c5857
AI
2217 (__mmask8) -1);
2218}
2219
2220extern __inline __m512i
2221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2222_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2223{
2224 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2225 (__v8di) __W,
2226 (__mmask8) __U);
2227}
2228
2229extern __inline __m512i
2230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2231_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2232{
2233 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2234 (__v8di)
2235 _mm512_setzero_si512 (),
2236 (__mmask8) __U);
2237}
2238
2239extern __inline __m512i
2240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241_mm512_cvtepi16_epi32 (__m256i __A)
2242{
2243 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2244 (__v16si)
4271e5cb 2245 _mm512_undefined_epi32 (),
756c5857
AI
2246 (__mmask16) -1);
2247}
2248
2249extern __inline __m512i
2250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2252{
2253 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2254 (__v16si) __W,
2255 (__mmask16) __U);
2256}
2257
2258extern __inline __m512i
2259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2261{
2262 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2263 (__v16si)
2264 _mm512_setzero_si512 (),
2265 (__mmask16) __U);
2266}
2267
2268extern __inline __m512i
2269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2270_mm512_cvtepi16_epi64 (__m128i __A)
2271{
2272 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2273 (__v8di)
4271e5cb 2274 _mm512_undefined_epi32 (),
756c5857
AI
2275 (__mmask8) -1);
2276}
2277
2278extern __inline __m512i
2279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2280_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2281{
2282 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2283 (__v8di) __W,
2284 (__mmask8) __U);
2285}
2286
2287extern __inline __m512i
2288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2289_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2290{
2291 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2292 (__v8di)
2293 _mm512_setzero_si512 (),
2294 (__mmask8) __U);
2295}
2296
2297extern __inline __m512i
2298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2299_mm512_cvtepi32_epi64 (__m256i __X)
2300{
2301 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2302 (__v8di)
4271e5cb 2303 _mm512_undefined_epi32 (),
756c5857
AI
2304 (__mmask8) -1);
2305}
2306
2307extern __inline __m512i
2308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2309_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2310{
2311 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2312 (__v8di) __W,
2313 (__mmask8) __U);
2314}
2315
2316extern __inline __m512i
2317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2319{
2320 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2321 (__v8di)
2322 _mm512_setzero_si512 (),
2323 (__mmask8) __U);
2324}
2325
2326extern __inline __m512i
2327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2328_mm512_cvtepu8_epi32 (__m128i __A)
2329{
2330 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2331 (__v16si)
4271e5cb 2332 _mm512_undefined_epi32 (),
756c5857
AI
2333 (__mmask16) -1);
2334}
2335
2336extern __inline __m512i
2337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2338_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2339{
2340 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2341 (__v16si) __W,
2342 (__mmask16) __U);
2343}
2344
2345extern __inline __m512i
2346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2347_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2348{
2349 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2350 (__v16si)
2351 _mm512_setzero_si512 (),
2352 (__mmask16) __U);
2353}
2354
2355extern __inline __m512i
2356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2357_mm512_cvtepu8_epi64 (__m128i __A)
2358{
2359 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2360 (__v8di)
4271e5cb 2361 _mm512_undefined_epi32 (),
756c5857
AI
2362 (__mmask8) -1);
2363}
2364
2365extern __inline __m512i
2366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2367_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2368{
2369 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2370 (__v8di) __W,
2371 (__mmask8) __U);
2372}
2373
2374extern __inline __m512i
2375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2377{
2378 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2379 (__v8di)
2380 _mm512_setzero_si512 (),
2381 (__mmask8) __U);
2382}
2383
2384extern __inline __m512i
2385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2386_mm512_cvtepu16_epi32 (__m256i __A)
2387{
2388 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2389 (__v16si)
4271e5cb 2390 _mm512_undefined_epi32 (),
756c5857
AI
2391 (__mmask16) -1);
2392}
2393
2394extern __inline __m512i
2395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2396_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2397{
2398 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2399 (__v16si) __W,
2400 (__mmask16) __U);
2401}
2402
2403extern __inline __m512i
2404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2405_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2406{
2407 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2408 (__v16si)
2409 _mm512_setzero_si512 (),
2410 (__mmask16) __U);
2411}
2412
2413extern __inline __m512i
2414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2415_mm512_cvtepu16_epi64 (__m128i __A)
2416{
2417 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2418 (__v8di)
4271e5cb 2419 _mm512_undefined_epi32 (),
756c5857
AI
2420 (__mmask8) -1);
2421}
2422
2423extern __inline __m512i
2424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2426{
2427 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2428 (__v8di) __W,
2429 (__mmask8) __U);
2430}
2431
2432extern __inline __m512i
2433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2434_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2435{
2436 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2437 (__v8di)
2438 _mm512_setzero_si512 (),
2439 (__mmask8) __U);
2440}
2441
2442extern __inline __m512i
2443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2444_mm512_cvtepu32_epi64 (__m256i __X)
2445{
2446 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2447 (__v8di)
4271e5cb 2448 _mm512_undefined_epi32 (),
756c5857
AI
2449 (__mmask8) -1);
2450}
2451
2452extern __inline __m512i
2453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2454_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2455{
2456 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2457 (__v8di) __W,
2458 (__mmask8) __U);
2459}
2460
2461extern __inline __m512i
2462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2463_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2464{
2465 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2466 (__v8di)
2467 _mm512_setzero_si512 (),
2468 (__mmask8) __U);
2469}
2470
2471#ifdef __OPTIMIZE__
2472extern __inline __m512d
2473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2474_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2475{
2476 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2477 (__v8df) __B,
2478 (__v8df)
0b192937 2479 _mm512_undefined_pd (),
756c5857
AI
2480 (__mmask8) -1, __R);
2481}
2482
2483extern __inline __m512d
2484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2485_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2486 __m512d __B, const int __R)
2487{
2488 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2489 (__v8df) __B,
2490 (__v8df) __W,
2491 (__mmask8) __U, __R);
2492}
2493
2494extern __inline __m512d
2495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2496_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2497 const int __R)
2498{
2499 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2500 (__v8df) __B,
2501 (__v8df)
2502 _mm512_setzero_pd (),
2503 (__mmask8) __U, __R);
2504}
2505
2506extern __inline __m512
2507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2508_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2509{
2510 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2511 (__v16sf) __B,
2512 (__v16sf)
0b192937 2513 _mm512_undefined_ps (),
756c5857
AI
2514 (__mmask16) -1, __R);
2515}
2516
2517extern __inline __m512
2518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2519_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2520 __m512 __B, const int __R)
2521{
2522 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2523 (__v16sf) __B,
2524 (__v16sf) __W,
2525 (__mmask16) __U, __R);
2526}
2527
2528extern __inline __m512
2529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2530_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2531{
2532 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2533 (__v16sf) __B,
2534 (__v16sf)
2535 _mm512_setzero_ps (),
2536 (__mmask16) __U, __R);
2537}
2538
2539extern __inline __m512d
2540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2541_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2542{
2543 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2544 (__v8df) __B,
2545 (__v8df)
0b192937 2546 _mm512_undefined_pd (),
756c5857
AI
2547 (__mmask8) -1, __R);
2548}
2549
2550extern __inline __m512d
2551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2553 __m512d __B, const int __R)
2554{
2555 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2556 (__v8df) __B,
2557 (__v8df) __W,
2558 (__mmask8) __U, __R);
2559}
2560
2561extern __inline __m512d
2562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2563_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2564 const int __R)
2565{
2566 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2567 (__v8df) __B,
2568 (__v8df)
2569 _mm512_setzero_pd (),
2570 (__mmask8) __U, __R);
2571}
2572
2573extern __inline __m512
2574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2575_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2576{
2577 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2578 (__v16sf) __B,
2579 (__v16sf)
0b192937 2580 _mm512_undefined_ps (),
756c5857
AI
2581 (__mmask16) -1, __R);
2582}
2583
2584extern __inline __m512
2585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2587 __m512 __B, const int __R)
2588{
2589 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2590 (__v16sf) __B,
2591 (__v16sf) __W,
2592 (__mmask16) __U, __R);
2593}
2594
2595extern __inline __m512
2596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2598{
2599 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2600 (__v16sf) __B,
2601 (__v16sf)
2602 _mm512_setzero_ps (),
2603 (__mmask16) __U, __R);
2604}
2605#else
2606#define _mm512_add_round_pd(A, B, C) \
0b192937 2607 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2608
2609#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2610 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2611
2612#define _mm512_maskz_add_round_pd(U, A, B, C) \
2613 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2614
2615#define _mm512_add_round_ps(A, B, C) \
0b192937 2616 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2617
2618#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2619 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2620
2621#define _mm512_maskz_add_round_ps(U, A, B, C) \
2622 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2623
2624#define _mm512_sub_round_pd(A, B, C) \
0b192937 2625 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2626
2627#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2628 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2629
2630#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2631 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2632
2633#define _mm512_sub_round_ps(A, B, C) \
0b192937 2634 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2635
2636#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2637 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2638
2639#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2640 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2641#endif
2642
2643#ifdef __OPTIMIZE__
2644extern __inline __m512d
2645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2646_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2647{
2648 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2649 (__v8df) __B,
2650 (__v8df)
0b192937 2651 _mm512_undefined_pd (),
756c5857
AI
2652 (__mmask8) -1, __R);
2653}
2654
2655extern __inline __m512d
2656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2657_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2658 __m512d __B, const int __R)
2659{
2660 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2661 (__v8df) __B,
2662 (__v8df) __W,
2663 (__mmask8) __U, __R);
2664}
2665
2666extern __inline __m512d
2667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2668_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2669 const int __R)
2670{
2671 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2672 (__v8df) __B,
2673 (__v8df)
2674 _mm512_setzero_pd (),
2675 (__mmask8) __U, __R);
2676}
2677
2678extern __inline __m512
2679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2681{
2682 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2683 (__v16sf) __B,
2684 (__v16sf)
0b192937 2685 _mm512_undefined_ps (),
756c5857
AI
2686 (__mmask16) -1, __R);
2687}
2688
2689extern __inline __m512
2690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2691_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2692 __m512 __B, const int __R)
2693{
2694 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2695 (__v16sf) __B,
2696 (__v16sf) __W,
2697 (__mmask16) __U, __R);
2698}
2699
2700extern __inline __m512
2701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2702_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2703{
2704 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2705 (__v16sf) __B,
2706 (__v16sf)
2707 _mm512_setzero_ps (),
2708 (__mmask16) __U, __R);
2709}
2710
2711extern __inline __m512d
2712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2713_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2714{
2715 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2716 (__v8df) __V,
2717 (__v8df)
0b192937 2718 _mm512_undefined_pd (),
756c5857
AI
2719 (__mmask8) -1, __R);
2720}
2721
2722extern __inline __m512d
2723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2724_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2725 __m512d __V, const int __R)
2726{
2727 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2728 (__v8df) __V,
2729 (__v8df) __W,
2730 (__mmask8) __U, __R);
2731}
2732
2733extern __inline __m512d
2734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2735_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2736 const int __R)
2737{
2738 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2739 (__v8df) __V,
2740 (__v8df)
2741 _mm512_setzero_pd (),
2742 (__mmask8) __U, __R);
2743}
2744
2745extern __inline __m512
2746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2748{
2749 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2750 (__v16sf) __B,
2751 (__v16sf)
0b192937 2752 _mm512_undefined_ps (),
756c5857
AI
2753 (__mmask16) -1, __R);
2754}
2755
2756extern __inline __m512
2757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2759 __m512 __B, const int __R)
2760{
2761 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2762 (__v16sf) __B,
2763 (__v16sf) __W,
2764 (__mmask16) __U, __R);
2765}
2766
2767extern __inline __m512
2768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2769_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2770{
2771 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2772 (__v16sf) __B,
2773 (__v16sf)
2774 _mm512_setzero_ps (),
2775 (__mmask16) __U, __R);
2776}
2777
075691af
AI
2778extern __inline __m128d
2779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2780_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2781{
2782 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2783 (__v2df) __B,
2784 __R);
2785}
2786
f4ee3a9e
UB
2787extern __inline __m128d
2788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2789_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2790 __m128d __B, const int __R)
2791{
2792 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2793 (__v2df) __B,
2794 (__v2df) __W,
2795 (__mmask8) __U, __R);
2796}
2797
2798extern __inline __m128d
2799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2800_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2801 const int __R)
2802{
2803 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2804 (__v2df) __B,
2805 (__v2df)
2806 _mm_setzero_pd (),
2807 (__mmask8) __U, __R);
2808}
2809
075691af
AI
2810extern __inline __m128
2811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2812_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2813{
2814 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2815 (__v4sf) __B,
2816 __R);
2817}
2818
f4ee3a9e
UB
2819extern __inline __m128
2820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2821_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2822 __m128 __B, const int __R)
2823{
2824 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2825 (__v4sf) __B,
2826 (__v4sf) __W,
2827 (__mmask8) __U, __R);
2828}
2829
2830extern __inline __m128
2831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2832_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2833 const int __R)
2834{
2835 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2836 (__v4sf) __B,
2837 (__v4sf)
2838 _mm_setzero_ps (),
2839 (__mmask8) __U, __R);
2840}
2841
075691af
AI
2842extern __inline __m128d
2843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2844_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2845{
2846 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2847 (__v2df) __B,
2848 __R);
2849}
2850
f4ee3a9e
UB
2851extern __inline __m128d
2852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2853_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2854 __m128d __B, const int __R)
2855{
2856 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2857 (__v2df) __B,
2858 (__v2df) __W,
2859 (__mmask8) __U, __R);
2860}
2861
2862extern __inline __m128d
2863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2864_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2865 const int __R)
2866{
2867 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2868 (__v2df) __B,
2869 (__v2df)
2870 _mm_setzero_pd (),
2871 (__mmask8) __U, __R);
2872}
2873
075691af
AI
2874extern __inline __m128
2875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2876_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2877{
2878 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2879 (__v4sf) __B,
2880 __R);
2881}
2882
f4ee3a9e
UB
2883extern __inline __m128
2884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2885_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2886 __m128 __B, const int __R)
2887{
2888 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2889 (__v4sf) __B,
2890 (__v4sf) __W,
2891 (__mmask8) __U, __R);
2892}
2893
2894extern __inline __m128
2895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2896_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2897 const int __R)
2898{
2899 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2900 (__v4sf) __B,
2901 (__v4sf)
2902 _mm_setzero_ps (),
2903 (__mmask8) __U, __R);
2904}
2905
756c5857
AI
2906#else
2907#define _mm512_mul_round_pd(A, B, C) \
0b192937 2908 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2909
2910#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2911 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2912
2913#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2914 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2915
2916#define _mm512_mul_round_ps(A, B, C) \
0b192937 2917 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2918
2919#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2920 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2921
2922#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2923 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2924
2925#define _mm512_div_round_pd(A, B, C) \
0b192937 2926 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2927
2928#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2929 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2930
2931#define _mm512_maskz_div_round_pd(U, A, B, C) \
2932 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2933
2934#define _mm512_div_round_ps(A, B, C) \
0b192937 2935 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2936
2937#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2938 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2939
2940#define _mm512_maskz_div_round_ps(U, A, B, C) \
2941 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2942
2943#define _mm_mul_round_sd(A, B, C) \
2944 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2945
f4ee3a9e
UB
2946#define _mm_mask_mul_round_sd(W, U, A, B, C) \
2947 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2948
2949#define _mm_maskz_mul_round_sd(U, A, B, C) \
2950 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2951
075691af
AI
2952#define _mm_mul_round_ss(A, B, C) \
2953 (__m128)__builtin_ia32_mulss_round(A, B, C)
2954
f4ee3a9e
UB
2955#define _mm_mask_mul_round_ss(W, U, A, B, C) \
2956 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2957
2958#define _mm_maskz_mul_round_ss(U, A, B, C) \
2959 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2960
075691af
AI
2961#define _mm_div_round_sd(A, B, C) \
2962 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2963
f4ee3a9e
UB
2964#define _mm_mask_div_round_sd(W, U, A, B, C) \
2965 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2966
2967#define _mm_maskz_div_round_sd(U, A, B, C) \
2968 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2969
075691af
AI
2970#define _mm_div_round_ss(A, B, C) \
2971 (__m128)__builtin_ia32_divss_round(A, B, C)
f4ee3a9e
UB
2972
2973#define _mm_mask_div_round_ss(W, U, A, B, C) \
2974 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2975
2976#define _mm_maskz_div_round_ss(U, A, B, C) \
2977 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2978
756c5857
AI
2979#endif
2980
2981#ifdef __OPTIMIZE__
2982extern __inline __m512d
2983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2984_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2985{
2986 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2987 (__v8df) __B,
2988 (__v8df)
0b192937 2989 _mm512_undefined_pd (),
756c5857
AI
2990 (__mmask8) -1, __R);
2991}
2992
2993extern __inline __m512d
2994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2995_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2996 __m512d __B, const int __R)
2997{
2998 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2999 (__v8df) __B,
3000 (__v8df) __W,
3001 (__mmask8) __U, __R);
3002}
3003
3004extern __inline __m512d
3005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3007 const int __R)
3008{
3009 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
3010 (__v8df) __B,
3011 (__v8df)
3012 _mm512_setzero_pd (),
3013 (__mmask8) __U, __R);
3014}
3015
3016extern __inline __m512
3017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3018_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
3019{
3020 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
3021 (__v16sf) __B,
3022 (__v16sf)
0b192937 3023 _mm512_undefined_ps (),
756c5857
AI
3024 (__mmask16) -1, __R);
3025}
3026
3027extern __inline __m512
3028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3029_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3030 __m512 __B, const int __R)
3031{
3032 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
3033 (__v16sf) __B,
3034 (__v16sf) __W,
3035 (__mmask16) __U, __R);
3036}
3037
3038extern __inline __m512
3039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3040_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
3041{
3042 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
3043 (__v16sf) __B,
3044 (__v16sf)
3045 _mm512_setzero_ps (),
3046 (__mmask16) __U, __R);
3047}
3048
3049extern __inline __m512d
3050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3051_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
3052{
3053 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3054 (__v8df) __B,
3055 (__v8df)
0b192937 3056 _mm512_undefined_pd (),
756c5857
AI
3057 (__mmask8) -1, __R);
3058}
3059
3060extern __inline __m512d
3061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3062_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3063 __m512d __B, const int __R)
3064{
3065 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3066 (__v8df) __B,
3067 (__v8df) __W,
3068 (__mmask8) __U, __R);
3069}
3070
3071extern __inline __m512d
3072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3073_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3074 const int __R)
3075{
3076 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3077 (__v8df) __B,
3078 (__v8df)
3079 _mm512_setzero_pd (),
3080 (__mmask8) __U, __R);
3081}
3082
3083extern __inline __m512
3084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3085_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
3086{
3087 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3088 (__v16sf) __B,
3089 (__v16sf)
0b192937 3090 _mm512_undefined_ps (),
756c5857
AI
3091 (__mmask16) -1, __R);
3092}
3093
3094extern __inline __m512
3095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3096_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3097 __m512 __B, const int __R)
3098{
3099 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3100 (__v16sf) __B,
3101 (__v16sf) __W,
3102 (__mmask16) __U, __R);
3103}
3104
3105extern __inline __m512
3106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3107_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
3108{
3109 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3110 (__v16sf) __B,
3111 (__v16sf)
3112 _mm512_setzero_ps (),
3113 (__mmask16) __U, __R);
3114}
3115#else
3116#define _mm512_max_round_pd(A, B, R) \
0b192937 3117 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
3118
3119#define _mm512_mask_max_round_pd(W, U, A, B, R) \
3120 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
3121
3122#define _mm512_maskz_max_round_pd(U, A, B, R) \
3123 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3124
3125#define _mm512_max_round_ps(A, B, R) \
0b192937 3126 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
756c5857
AI
3127
3128#define _mm512_mask_max_round_ps(W, U, A, B, R) \
3129 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
3130
3131#define _mm512_maskz_max_round_ps(U, A, B, R) \
3132 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3133
3134#define _mm512_min_round_pd(A, B, R) \
0b192937 3135 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
3136
3137#define _mm512_mask_min_round_pd(W, U, A, B, R) \
3138 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
3139
3140#define _mm512_maskz_min_round_pd(U, A, B, R) \
3141 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3142
3143#define _mm512_min_round_ps(A, B, R) \
0b192937 3144 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
756c5857
AI
3145
3146#define _mm512_mask_min_round_ps(W, U, A, B, R) \
3147 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
3148
3149#define _mm512_maskz_min_round_ps(U, A, B, R) \
3150 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3151#endif
3152
3153#ifdef __OPTIMIZE__
3154extern __inline __m512d
3155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3156_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
3157{
3158 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3159 (__v8df) __B,
3160 (__v8df)
0b192937 3161 _mm512_undefined_pd (),
756c5857
AI
3162 (__mmask8) -1, __R);
3163}
3164
3165extern __inline __m512d
3166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3167_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3168 __m512d __B, const int __R)
3169{
3170 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3171 (__v8df) __B,
3172 (__v8df) __W,
3173 (__mmask8) __U, __R);
3174}
3175
3176extern __inline __m512d
3177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3178_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3179 const int __R)
3180{
3181 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3182 (__v8df) __B,
3183 (__v8df)
3184 _mm512_setzero_pd (),
3185 (__mmask8) __U, __R);
3186}
3187
3188extern __inline __m512
3189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3190_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3191{
3192 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3193 (__v16sf) __B,
3194 (__v16sf)
0b192937 3195 _mm512_undefined_ps (),
756c5857
AI
3196 (__mmask16) -1, __R);
3197}
3198
3199extern __inline __m512
3200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3201_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3202 __m512 __B, const int __R)
3203{
3204 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3205 (__v16sf) __B,
3206 (__v16sf) __W,
3207 (__mmask16) __U, __R);
3208}
3209
3210extern __inline __m512
3211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3212_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3213 const int __R)
3214{
3215 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3216 (__v16sf) __B,
3217 (__v16sf)
3218 _mm512_setzero_ps (),
3219 (__mmask16) __U, __R);
3220}
3221
075691af
AI
3222extern __inline __m128d
3223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3224_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3225{
158061a6
OM
3226 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3227 (__v2df) __B,
3228 (__v2df)
3229 _mm_setzero_pd (),
3230 (__mmask8) -1, __R);
3231}
3232
3233extern __inline __m128d
3234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3235_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3236 const int __R)
3237{
3238 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3239 (__v2df) __B,
3240 (__v2df) __W,
3241 (__mmask8) __U, __R);
3242}
3243
3244extern __inline __m128d
3245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3246_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
3247 const int __R)
3248{
3249 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3250 (__v2df) __B,
3251 (__v2df)
3252 _mm_setzero_pd (),
3253 (__mmask8) __U, __R);
075691af
AI
3254}
3255
3256extern __inline __m128
3257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3258_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3259{
158061a6
OM
3260 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3261 (__v4sf) __B,
3262 (__v4sf)
3263 _mm_setzero_ps (),
3264 (__mmask8) -1, __R);
3265}
3266
3267extern __inline __m128
3268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3269_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3270 const int __R)
3271{
3272 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3273 (__v4sf) __B,
3274 (__v4sf) __W,
3275 (__mmask8) __U, __R);
3276}
3277
3278extern __inline __m128
3279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3280_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
3281{
3282 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3283 (__v4sf) __B,
3284 (__v4sf)
3285 _mm_setzero_ps (),
3286 (__mmask8) __U, __R);
075691af 3287}
756c5857
AI
3288#else
3289#define _mm512_scalef_round_pd(A, B, C) \
0b192937 3290 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
3291
3292#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3293 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3294
3295#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3296 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3297
3298#define _mm512_scalef_round_ps(A, B, C) \
0b192937 3299 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
3300
3301#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3302 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3303
3304#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3305 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
3306
3307#define _mm_scalef_round_sd(A, B, C) \
158061a6
OM
3308 (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
3309 (__v2df)_mm_setzero_pd (), -1, C)
075691af
AI
3310
3311#define _mm_scalef_round_ss(A, B, C) \
158061a6
OM
3312 (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
3313 (__v4sf)_mm_setzero_ps (), -1, C)
756c5857
AI
3314#endif
3315
93103603
SP
3316#define _mm_mask_scalef_sd(W, U, A, B) \
3317 _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
3318
3319#define _mm_maskz_scalef_sd(U, A, B) \
3320 _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
3321
3322#define _mm_mask_scalef_ss(W, U, A, B) \
3323 _mm_mask_scalef_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
3324
3325#define _mm_maskz_scalef_ss(U, A, B) \
3326 _mm_maskz_scalef_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
3327
756c5857
AI
3328#ifdef __OPTIMIZE__
3329extern __inline __m512d
3330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3332{
3333 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3334 (__v8df) __B,
3335 (__v8df) __C,
3336 (__mmask8) -1, __R);
3337}
3338
3339extern __inline __m512d
3340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3341_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3342 __m512d __C, const int __R)
3343{
3344 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3345 (__v8df) __B,
3346 (__v8df) __C,
3347 (__mmask8) __U, __R);
3348}
3349
3350extern __inline __m512d
3351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3353 __mmask8 __U, const int __R)
3354{
3355 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3356 (__v8df) __B,
3357 (__v8df) __C,
3358 (__mmask8) __U, __R);
3359}
3360
3361extern __inline __m512d
3362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3363_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3364 __m512d __C, const int __R)
3365{
3366 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3367 (__v8df) __B,
3368 (__v8df) __C,
3369 (__mmask8) __U, __R);
3370}
3371
3372extern __inline __m512
3373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3374_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3375{
3376 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3377 (__v16sf) __B,
3378 (__v16sf) __C,
3379 (__mmask16) -1, __R);
3380}
3381
3382extern __inline __m512
3383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3384_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3385 __m512 __C, const int __R)
3386{
3387 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3388 (__v16sf) __B,
3389 (__v16sf) __C,
3390 (__mmask16) __U, __R);
3391}
3392
3393extern __inline __m512
3394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3395_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3396 __mmask16 __U, const int __R)
3397{
3398 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3399 (__v16sf) __B,
3400 (__v16sf) __C,
3401 (__mmask16) __U, __R);
3402}
3403
3404extern __inline __m512
3405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3407 __m512 __C, const int __R)
3408{
3409 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3410 (__v16sf) __B,
3411 (__v16sf) __C,
3412 (__mmask16) __U, __R);
3413}
3414
3415extern __inline __m512d
3416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3418{
fe7f972d 3419 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 3420 (__v8df) __B,
fe7f972d 3421 (__v8df) __C,
756c5857
AI
3422 (__mmask8) -1, __R);
3423}
3424
3425extern __inline __m512d
3426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3427_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3428 __m512d __C, const int __R)
3429{
fe7f972d 3430 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 3431 (__v8df) __B,
fe7f972d 3432 (__v8df) __C,
756c5857
AI
3433 (__mmask8) __U, __R);
3434}
3435
3436extern __inline __m512d
3437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3439 __mmask8 __U, const int __R)
3440{
3441 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3442 (__v8df) __B,
3443 (__v8df) __C,
3444 (__mmask8) __U, __R);
3445}
3446
3447extern __inline __m512d
3448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3449_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3450 __m512d __C, const int __R)
3451{
fe7f972d 3452 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
756c5857 3453 (__v8df) __B,
fe7f972d 3454 (__v8df) __C,
756c5857
AI
3455 (__mmask8) __U, __R);
3456}
3457
3458extern __inline __m512
3459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3460_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3461{
fe7f972d 3462 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 3463 (__v16sf) __B,
fe7f972d 3464 (__v16sf) __C,
756c5857
AI
3465 (__mmask16) -1, __R);
3466}
3467
3468extern __inline __m512
3469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3470_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3471 __m512 __C, const int __R)
3472{
fe7f972d 3473 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 3474 (__v16sf) __B,
fe7f972d 3475 (__v16sf) __C,
756c5857
AI
3476 (__mmask16) __U, __R);
3477}
3478
3479extern __inline __m512
3480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3481_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3482 __mmask16 __U, const int __R)
3483{
3484 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3485 (__v16sf) __B,
3486 (__v16sf) __C,
3487 (__mmask16) __U, __R);
3488}
3489
3490extern __inline __m512
3491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3493 __m512 __C, const int __R)
3494{
fe7f972d 3495 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
756c5857 3496 (__v16sf) __B,
fe7f972d 3497 (__v16sf) __C,
756c5857
AI
3498 (__mmask16) __U, __R);
3499}
3500
3501extern __inline __m512d
3502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3503_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3504{
3505 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3506 (__v8df) __B,
3507 (__v8df) __C,
3508 (__mmask8) -1, __R);
3509}
3510
3511extern __inline __m512d
3512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3513_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3514 __m512d __C, const int __R)
3515{
3516 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3517 (__v8df) __B,
3518 (__v8df) __C,
3519 (__mmask8) __U, __R);
3520}
3521
3522extern __inline __m512d
3523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3524_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3525 __mmask8 __U, const int __R)
3526{
3527 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3528 (__v8df) __B,
3529 (__v8df) __C,
3530 (__mmask8) __U, __R);
3531}
3532
3533extern __inline __m512d
3534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3536 __m512d __C, const int __R)
3537{
3538 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3539 (__v8df) __B,
3540 (__v8df) __C,
3541 (__mmask8) __U, __R);
3542}
3543
3544extern __inline __m512
3545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3546_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3547{
3548 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3549 (__v16sf) __B,
3550 (__v16sf) __C,
3551 (__mmask16) -1, __R);
3552}
3553
3554extern __inline __m512
3555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3557 __m512 __C, const int __R)
3558{
3559 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3560 (__v16sf) __B,
3561 (__v16sf) __C,
3562 (__mmask16) __U, __R);
3563}
3564
3565extern __inline __m512
3566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3568 __mmask16 __U, const int __R)
3569{
3570 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3571 (__v16sf) __B,
3572 (__v16sf) __C,
3573 (__mmask16) __U, __R);
3574}
3575
3576extern __inline __m512
3577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3579 __m512 __C, const int __R)
3580{
3581 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3582 (__v16sf) __B,
3583 (__v16sf) __C,
3584 (__mmask16) __U, __R);
3585}
3586
3587extern __inline __m512d
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3590{
3591 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3592 (__v8df) __B,
3593 -(__v8df) __C,
3594 (__mmask8) -1, __R);
3595}
3596
3597extern __inline __m512d
3598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3599_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3600 __m512d __C, const int __R)
3601{
3602 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3603 (__v8df) __B,
3604 -(__v8df) __C,
3605 (__mmask8) __U, __R);
3606}
3607
3608extern __inline __m512d
3609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3610_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3611 __mmask8 __U, const int __R)
3612{
3613 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3614 (__v8df) __B,
3615 (__v8df) __C,
3616 (__mmask8) __U, __R);
3617}
3618
3619extern __inline __m512d
3620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3621_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3622 __m512d __C, const int __R)
3623{
3624 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3625 (__v8df) __B,
3626 -(__v8df) __C,
3627 (__mmask8) __U, __R);
3628}
3629
3630extern __inline __m512
3631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3633{
3634 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3635 (__v16sf) __B,
3636 -(__v16sf) __C,
3637 (__mmask16) -1, __R);
3638}
3639
3640extern __inline __m512
3641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3642_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3643 __m512 __C, const int __R)
3644{
3645 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3646 (__v16sf) __B,
3647 -(__v16sf) __C,
3648 (__mmask16) __U, __R);
3649}
3650
3651extern __inline __m512
3652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3653_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3654 __mmask16 __U, const int __R)
3655{
3656 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3657 (__v16sf) __B,
3658 (__v16sf) __C,
3659 (__mmask16) __U, __R);
3660}
3661
3662extern __inline __m512
3663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3664_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3665 __m512 __C, const int __R)
3666{
3667 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3668 (__v16sf) __B,
3669 -(__v16sf) __C,
3670 (__mmask16) __U, __R);
3671}
3672
3673extern __inline __m512d
3674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3675_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3676{
5ca94977
L
3677 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3678 (__v8df) __B,
3679 (__v8df) __C,
3680 (__mmask8) -1, __R);
756c5857
AI
3681}
3682
3683extern __inline __m512d
3684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3685_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3686 __m512d __C, const int __R)
3687{
3688 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3689 (__v8df) __B,
3690 (__v8df) __C,
3691 (__mmask8) __U, __R);
3692}
3693
3694extern __inline __m512d
3695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3696_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3697 __mmask8 __U, const int __R)
3698{
5ca94977
L
3699 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
3700 (__v8df) __B,
3701 (__v8df) __C,
3702 (__mmask8) __U, __R);
756c5857
AI
3703}
3704
3705extern __inline __m512d
3706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3707_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3708 __m512d __C, const int __R)
3709{
5ca94977
L
3710 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
3711 (__v8df) __B,
3712 (__v8df) __C,
3713 (__mmask8) __U, __R);
756c5857
AI
3714}
3715
3716extern __inline __m512
3717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3718_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3719{
5ca94977
L
3720 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3721 (__v16sf) __B,
3722 (__v16sf) __C,
3723 (__mmask16) -1, __R);
756c5857
AI
3724}
3725
3726extern __inline __m512
3727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3729 __m512 __C, const int __R)
3730{
3731 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3732 (__v16sf) __B,
3733 (__v16sf) __C,
3734 (__mmask16) __U, __R);
3735}
3736
3737extern __inline __m512
3738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3739_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3740 __mmask16 __U, const int __R)
3741{
5ca94977
L
3742 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
3743 (__v16sf) __B,
3744 (__v16sf) __C,
3745 (__mmask16) __U, __R);
756c5857
AI
3746}
3747
3748extern __inline __m512
3749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3750_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3751 __m512 __C, const int __R)
3752{
5ca94977
L
3753 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
3754 (__v16sf) __B,
3755 (__v16sf) __C,
3756 (__mmask16) __U, __R);
756c5857
AI
3757}
3758
3759extern __inline __m512d
3760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3761_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3762{
38ef6fb1
L
3763 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3764 (__v8df) __B,
3765 (__v8df) __C,
3766 (__mmask8) -1, __R);
756c5857
AI
3767}
3768
3769extern __inline __m512d
3770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3772 __m512d __C, const int __R)
3773{
3774 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3775 (__v8df) __B,
3776 (__v8df) __C,
3777 (__mmask8) __U, __R);
3778}
3779
3780extern __inline __m512d
3781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3783 __mmask8 __U, const int __R)
3784{
3785 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3786 (__v8df) __B,
3787 (__v8df) __C,
3788 (__mmask8) __U, __R);
3789}
3790
3791extern __inline __m512d
3792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3794 __m512d __C, const int __R)
3795{
38ef6fb1
L
3796 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
3797 (__v8df) __B,
3798 (__v8df) __C,
3799 (__mmask8) __U, __R);
756c5857
AI
3800}
3801
3802extern __inline __m512
3803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3805{
38ef6fb1
L
3806 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3807 (__v16sf) __B,
3808 (__v16sf) __C,
3809 (__mmask16) -1, __R);
756c5857
AI
3810}
3811
3812extern __inline __m512
3813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3814_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3815 __m512 __C, const int __R)
3816{
3817 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3818 (__v16sf) __B,
3819 (__v16sf) __C,
3820 (__mmask16) __U, __R);
3821}
3822
3823extern __inline __m512
3824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3825_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3826 __mmask16 __U, const int __R)
3827{
3828 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3829 (__v16sf) __B,
3830 (__v16sf) __C,
3831 (__mmask16) __U, __R);
3832}
3833
3834extern __inline __m512
3835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3836_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3837 __m512 __C, const int __R)
3838{
38ef6fb1
L
3839 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
3840 (__v16sf) __B,
3841 (__v16sf) __C,
3842 (__mmask16) __U, __R);
756c5857
AI
3843}
3844#else
3845#define _mm512_fmadd_round_pd(A, B, C, R) \
3846 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3847
3848#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3849 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3850
3851#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3852 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3853
3854#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3855 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3856
3857#define _mm512_fmadd_round_ps(A, B, C, R) \
3858 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3859
3860#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3861 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3862
3863#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3864 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3865
3866#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3867 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3868
3869#define _mm512_fmsub_round_pd(A, B, C, R) \
fe7f972d 3870 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
756c5857
AI
3871
3872#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
fe7f972d 3873 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
756c5857
AI
3874
3875#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3876 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3877
3878#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
fe7f972d 3879 (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
756c5857
AI
3880
3881#define _mm512_fmsub_round_ps(A, B, C, R) \
fe7f972d 3882 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
756c5857
AI
3883
3884#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
fe7f972d 3885 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
756c5857
AI
3886
3887#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3888 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3889
3890#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
fe7f972d 3891 (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
756c5857
AI
3892
3893#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3894 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3895
3896#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
a70b9661 3897 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R)
756c5857
AI
3898
3899#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3900 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3901
3902#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3903 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3904
3905#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3906 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3907
3908#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3909 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3910
3911#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3912 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3913
3914#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3915 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3916
3917#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3918 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3919
3920#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3921 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3922
3923#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3924 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3925
3926#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3927 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3928
3929#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3930 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3931
3932#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3933 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3934
3935#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3936 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3937
3938#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3939 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3940
3941#define _mm512_fnmadd_round_pd(A, B, C, R) \
5ca94977 3942 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
756c5857
AI
3943
3944#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
5ca94977 3945 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
756c5857
AI
3946
3947#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
5ca94977 3948 (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
756c5857
AI
3949
3950#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
5ca94977 3951 (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
756c5857
AI
3952
3953#define _mm512_fnmadd_round_ps(A, B, C, R) \
5ca94977 3954 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
756c5857
AI
3955
3956#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
5ca94977 3957 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
756c5857
AI
3958
3959#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
5ca94977 3960 (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
756c5857
AI
3961
3962#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
5ca94977 3963 (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
756c5857
AI
3964
3965#define _mm512_fnmsub_round_pd(A, B, C, R) \
38ef6fb1 3966 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
756c5857
AI
3967
3968#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3969 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3970
3971#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3972 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3973
3974#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
38ef6fb1 3975 (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
756c5857
AI
3976
3977#define _mm512_fnmsub_round_ps(A, B, C, R) \
38ef6fb1 3978 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
756c5857
AI
3979
3980#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3981 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3982
3983#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3984 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3985
3986#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
38ef6fb1 3987 (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
756c5857
AI
3988#endif
3989
3990extern __inline __m512i
3991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3992_mm512_abs_epi64 (__m512i __A)
3993{
3994 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3995 (__v8di)
4271e5cb 3996 _mm512_undefined_epi32 (),
756c5857
AI
3997 (__mmask8) -1);
3998}
3999
4000extern __inline __m512i
4001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4002_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4003{
4004 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
4005 (__v8di) __W,
4006 (__mmask8) __U);
4007}
4008
4009extern __inline __m512i
4010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4011_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
4012{
4013 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
4014 (__v8di)
4015 _mm512_setzero_si512 (),
4016 (__mmask8) __U);
4017}
4018
4019extern __inline __m512i
4020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4021_mm512_abs_epi32 (__m512i __A)
4022{
4023 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
4024 (__v16si)
4271e5cb 4025 _mm512_undefined_epi32 (),
756c5857
AI
4026 (__mmask16) -1);
4027}
4028
4029extern __inline __m512i
4030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4031_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4032{
4033 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
4034 (__v16si) __W,
4035 (__mmask16) __U);
4036}
4037
4038extern __inline __m512i
4039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4040_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
4041{
4042 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
4043 (__v16si)
4044 _mm512_setzero_si512 (),
4045 (__mmask16) __U);
4046}
4047
4048extern __inline __m512
4049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4050_mm512_broadcastss_ps (__m128 __A)
4051{
0b192937
UD
4052 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4053 (__v16sf)
4054 _mm512_undefined_ps (),
756c5857
AI
4055 (__mmask16) -1);
4056}
4057
4058extern __inline __m512
4059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4060_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
4061{
4062 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4063 (__v16sf) __O, __M);
4064}
4065
4066extern __inline __m512
4067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4068_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
4069{
4070 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4071 (__v16sf)
4072 _mm512_setzero_ps (),
4073 __M);
4074}
4075
4076extern __inline __m512d
4077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4078_mm512_broadcastsd_pd (__m128d __A)
4079{
0b192937
UD
4080 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4081 (__v8df)
4082 _mm512_undefined_pd (),
756c5857
AI
4083 (__mmask8) -1);
4084}
4085
4086extern __inline __m512d
4087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4088_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
4089{
4090 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4091 (__v8df) __O, __M);
4092}
4093
4094extern __inline __m512d
4095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
4097{
4098 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4099 (__v8df)
4100 _mm512_setzero_pd (),
4101 __M);
4102}
4103
4104extern __inline __m512i
4105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4106_mm512_broadcastd_epi32 (__m128i __A)
4107{
0b192937
UD
4108 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4109 (__v16si)
4271e5cb 4110 _mm512_undefined_epi32 (),
756c5857
AI
4111 (__mmask16) -1);
4112}
4113
4114extern __inline __m512i
4115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4116_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
4117{
4118 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4119 (__v16si) __O, __M);
4120}
4121
4122extern __inline __m512i
4123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4124_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
4125{
4126 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4127 (__v16si)
4128 _mm512_setzero_si512 (),
4129 __M);
4130}
4131
4132extern __inline __m512i
4133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4134_mm512_set1_epi32 (int __A)
4135{
43373412 4136 return (__m512i)(__v16si)
4137 { __A, __A, __A, __A, __A, __A, __A, __A,
4138 __A, __A, __A, __A, __A, __A, __A, __A };
756c5857
AI
4139}
4140
4141extern __inline __m512i
4142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4143_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
4144{
4145 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
4146 __M);
4147}
4148
4149extern __inline __m512i
4150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4151_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
4152{
4153 return (__m512i)
4154 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4155 (__v16si) _mm512_setzero_si512 (),
4156 __M);
4157}
4158
4159extern __inline __m512i
4160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4161_mm512_broadcastq_epi64 (__m128i __A)
4162{
0b192937
UD
4163 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4164 (__v8di)
4271e5cb 4165 _mm512_undefined_epi32 (),
756c5857
AI
4166 (__mmask8) -1);
4167}
4168
4169extern __inline __m512i
4170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
4172{
4173 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4174 (__v8di) __O, __M);
4175}
4176
4177extern __inline __m512i
4178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4179_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
4180{
4181 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4182 (__v8di)
4183 _mm512_setzero_si512 (),
4184 __M);
4185}
4186
4187extern __inline __m512i
4188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4189_mm512_set1_epi64 (long long __A)
4190{
43373412 4191 return (__m512i)(__v8di) { __A, __A, __A, __A, __A, __A, __A, __A };
756c5857
AI
4192}
4193
4194extern __inline __m512i
4195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
4197{
756c5857
AI
4198 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
4199 __M);
756c5857
AI
4200}
4201
4202extern __inline __m512i
4203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4204_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
4205{
756c5857
AI
4206 return (__m512i)
4207 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4208 (__v8di) _mm512_setzero_si512 (),
4209 __M);
756c5857
AI
4210}
4211
4212extern __inline __m512
4213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214_mm512_broadcast_f32x4 (__m128 __A)
4215{
0b192937
UD
4216 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4217 (__v16sf)
4218 _mm512_undefined_ps (),
756c5857
AI
4219 (__mmask16) -1);
4220}
4221
4222extern __inline __m512
4223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
4225{
4226 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4227 (__v16sf) __O,
4228 __M);
4229}
4230
4231extern __inline __m512
4232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4233_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
4234{
4235 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4236 (__v16sf)
4237 _mm512_setzero_ps (),
4238 __M);
4239}
4240
4241extern __inline __m512i
4242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4243_mm512_broadcast_i32x4 (__m128i __A)
4244{
756c5857 4245 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
0b192937 4246 (__v16si)
4271e5cb 4247 _mm512_undefined_epi32 (),
756c5857
AI
4248 (__mmask16) -1);
4249}
4250
4251extern __inline __m512i
4252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4253_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4254{
4255 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4256 (__v16si) __O,
4257 __M);
4258}
4259
4260extern __inline __m512i
4261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4262_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4263{
4264 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4265 (__v16si)
4266 _mm512_setzero_si512 (),
4267 __M);
4268}
4269
4270extern __inline __m512d
4271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4272_mm512_broadcast_f64x4 (__m256d __A)
4273{
756c5857 4274 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
0b192937
UD
4275 (__v8df)
4276 _mm512_undefined_pd (),
756c5857
AI
4277 (__mmask8) -1);
4278}
4279
4280extern __inline __m512d
4281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4282_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4283{
4284 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4285 (__v8df) __O,
4286 __M);
4287}
4288
4289extern __inline __m512d
4290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4291_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4292{
4293 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4294 (__v8df)
4295 _mm512_setzero_pd (),
4296 __M);
4297}
4298
4299extern __inline __m512i
4300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4301_mm512_broadcast_i64x4 (__m256i __A)
4302{
756c5857 4303 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
0b192937 4304 (__v8di)
4271e5cb 4305 _mm512_undefined_epi32 (),
756c5857
AI
4306 (__mmask8) -1);
4307}
4308
4309extern __inline __m512i
4310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4311_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4312{
4313 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4314 (__v8di) __O,
4315 __M);
4316}
4317
4318extern __inline __m512i
4319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4320_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4321{
4322 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4323 (__v8di)
4324 _mm512_setzero_si512 (),
4325 __M);
4326}
4327
4328typedef enum
4329{
4330 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4331 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4332 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4333 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4334 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4335 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4336 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4337 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4338 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4339 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4340 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4341 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4342 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4343 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4344 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4345 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4346 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4347 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4348 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4349 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4350 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4351 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4352 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4353 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4354 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4355 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4356 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4357 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4358 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4359 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4360 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4361 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4362 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4363 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4364 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4365 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4366 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4367 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4368 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4369 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4370 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4371 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4372 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4373 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4374 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4375 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4376 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4377 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4378 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4379 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4380 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4381 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4382 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4383 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4384 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4385 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4386 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4387 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4388 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4389 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4390 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4391 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4392 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4393 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4394 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4395 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4396 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4397 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4398 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4399 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4400 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4401 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4402 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4403 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4404 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4405 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4406 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4407 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4408 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4409 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4410 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4411 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4412 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4413 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4414 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4415 _MM_PERM_DDDD = 0xFF
4416} _MM_PERM_ENUM;
4417
4418#ifdef __OPTIMIZE__
4419extern __inline __m512i
4420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4421_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4422{
4423 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4424 __mask,
4425 (__v16si)
4271e5cb 4426 _mm512_undefined_epi32 (),
756c5857
AI
4427 (__mmask16) -1);
4428}
4429
4430extern __inline __m512i
4431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4432_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4433 _MM_PERM_ENUM __mask)
4434{
4435 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4436 __mask,
4437 (__v16si) __W,
4438 (__mmask16) __U);
4439}
4440
4441extern __inline __m512i
4442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4443_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4444{
4445 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4446 __mask,
4447 (__v16si)
4448 _mm512_setzero_si512 (),
4449 (__mmask16) __U);
4450}
4451
4452extern __inline __m512i
4453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4454_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4455{
4456 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4457 (__v8di) __B, __imm,
4458 (__v8di)
4271e5cb 4459 _mm512_undefined_epi32 (),
756c5857
AI
4460 (__mmask8) -1);
4461}
4462
4463extern __inline __m512i
4464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4465_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4466 __m512i __B, const int __imm)
4467{
4468 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4469 (__v8di) __B, __imm,
4470 (__v8di) __W,
4471 (__mmask8) __U);
4472}
4473
4474extern __inline __m512i
4475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4476_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4477 const int __imm)
4478{
4479 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4480 (__v8di) __B, __imm,
4481 (__v8di)
4482 _mm512_setzero_si512 (),
4483 (__mmask8) __U);
4484}
4485
4486extern __inline __m512i
4487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4489{
4490 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4491 (__v16si) __B,
4492 __imm,
4493 (__v16si)
4271e5cb 4494 _mm512_undefined_epi32 (),
756c5857
AI
4495 (__mmask16) -1);
4496}
4497
4498extern __inline __m512i
4499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4500_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4501 __m512i __B, const int __imm)
4502{
4503 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4504 (__v16si) __B,
4505 __imm,
4506 (__v16si) __W,
4507 (__mmask16) __U);
4508}
4509
4510extern __inline __m512i
4511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4512_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4513 const int __imm)
4514{
4515 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4516 (__v16si) __B,
4517 __imm,
4518 (__v16si)
4519 _mm512_setzero_si512 (),
4520 (__mmask16) __U);
4521}
4522
4523extern __inline __m512d
4524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4526{
4527 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4528 (__v8df) __B, __imm,
4529 (__v8df)
0b192937 4530 _mm512_undefined_pd (),
756c5857
AI
4531 (__mmask8) -1);
4532}
4533
4534extern __inline __m512d
4535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4537 __m512d __B, const int __imm)
4538{
4539 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4540 (__v8df) __B, __imm,
4541 (__v8df) __W,
4542 (__mmask8) __U);
4543}
4544
4545extern __inline __m512d
4546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4548 const int __imm)
4549{
4550 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4551 (__v8df) __B, __imm,
4552 (__v8df)
4553 _mm512_setzero_pd (),
4554 (__mmask8) __U);
4555}
4556
4557extern __inline __m512
4558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4559_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4560{
4561 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4562 (__v16sf) __B, __imm,
4563 (__v16sf)
0b192937 4564 _mm512_undefined_ps (),
756c5857
AI
4565 (__mmask16) -1);
4566}
4567
4568extern __inline __m512
4569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4570_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4571 __m512 __B, const int __imm)
4572{
4573 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4574 (__v16sf) __B, __imm,
4575 (__v16sf) __W,
4576 (__mmask16) __U);
4577}
4578
4579extern __inline __m512
4580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4581_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4582 const int __imm)
4583{
4584 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4585 (__v16sf) __B, __imm,
4586 (__v16sf)
4587 _mm512_setzero_ps (),
4588 (__mmask16) __U);
4589}
4590
4591#else
4592#define _mm512_shuffle_epi32(X, C) \
4593 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 4594 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4595 (__mmask16)-1))
4596
4597#define _mm512_mask_shuffle_epi32(W, U, X, C) \
4598 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4599 (__v16si)(__m512i)(W),\
4600 (__mmask16)(U)))
4601
4602#define _mm512_maskz_shuffle_epi32(U, X, C) \
4603 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4604 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4605 (__mmask16)(U)))
4606
4607#define _mm512_shuffle_i64x2(X, Y, C) \
4608 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4609 (__v8di)(__m512i)(Y), (int)(C),\
4271e5cb 4610 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4611 (__mmask8)-1))
4612
4613#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4614 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4615 (__v8di)(__m512i)(Y), (int)(C),\
4616 (__v8di)(__m512i)(W),\
4617 (__mmask8)(U)))
4618
4619#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4620 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4621 (__v8di)(__m512i)(Y), (int)(C),\
4622 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4623 (__mmask8)(U)))
4624
4625#define _mm512_shuffle_i32x4(X, Y, C) \
4626 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4627 (__v16si)(__m512i)(Y), (int)(C),\
4271e5cb 4628 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4629 (__mmask16)-1))
4630
4631#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4632 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4633 (__v16si)(__m512i)(Y), (int)(C),\
4634 (__v16si)(__m512i)(W),\
4635 (__mmask16)(U)))
4636
4637#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4638 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4639 (__v16si)(__m512i)(Y), (int)(C),\
4640 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4641 (__mmask16)(U)))
4642
4643#define _mm512_shuffle_f64x2(X, Y, C) \
4644 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4645 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 4646 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
4647 (__mmask8)-1))
4648
4649#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4650 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4651 (__v8df)(__m512d)(Y), (int)(C),\
4652 (__v8df)(__m512d)(W),\
4653 (__mmask8)(U)))
4654
4655#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4656 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4657 (__v8df)(__m512d)(Y), (int)(C),\
4658 (__v8df)(__m512d)_mm512_setzero_pd(),\
4659 (__mmask8)(U)))
4660
4661#define _mm512_shuffle_f32x4(X, Y, C) \
4662 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4663 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 4664 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
4665 (__mmask16)-1))
4666
4667#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4668 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4669 (__v16sf)(__m512)(Y), (int)(C),\
4670 (__v16sf)(__m512)(W),\
4671 (__mmask16)(U)))
4672
4673#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4674 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4675 (__v16sf)(__m512)(Y), (int)(C),\
4676 (__v16sf)(__m512)_mm512_setzero_ps(),\
4677 (__mmask16)(U)))
4678#endif
4679
4680extern __inline __m512i
4681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4683{
4684 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4685 (__v16si) __B,
4686 (__v16si)
4271e5cb 4687 _mm512_undefined_epi32 (),
756c5857
AI
4688 (__mmask16) -1);
4689}
4690
4691extern __inline __m512i
4692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4693_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4694{
4695 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4696 (__v16si) __B,
4697 (__v16si) __W,
4698 (__mmask16) __U);
4699}
4700
4701extern __inline __m512i
4702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4703_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4704{
4705 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4706 (__v16si) __B,
4707 (__v16si)
4708 _mm512_setzero_si512 (),
4709 (__mmask16) __U);
4710}
4711
4712extern __inline __m512i
4713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4714_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4715{
4716 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4717 (__v16si) __B,
4718 (__v16si)
4271e5cb 4719 _mm512_undefined_epi32 (),
756c5857
AI
4720 (__mmask16) -1);
4721}
4722
4723extern __inline __m512i
4724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4725_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4726{
4727 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4728 (__v16si) __B,
4729 (__v16si) __W,
4730 (__mmask16) __U);
4731}
4732
4733extern __inline __m512i
4734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4735_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4736{
4737 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4738 (__v16si) __B,
4739 (__v16si)
4740 _mm512_setzero_si512 (),
4741 (__mmask16) __U);
4742}
4743
4744extern __inline __m512i
4745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4747{
4748 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4749 (__v8di) __B,
4750 (__v8di)
4271e5cb 4751 _mm512_undefined_epi32 (),
756c5857
AI
4752 (__mmask8) -1);
4753}
4754
4755extern __inline __m512i
4756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4757_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4758{
4759 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4760 (__v8di) __B,
4761 (__v8di) __W,
4762 (__mmask8) __U);
4763}
4764
4765extern __inline __m512i
4766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4767_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4768{
4769 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4770 (__v8di) __B,
4771 (__v8di)
4772 _mm512_setzero_si512 (),
4773 (__mmask8) __U);
4774}
4775
4776extern __inline __m512i
4777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4778_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4779{
4780 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4781 (__v8di) __B,
4782 (__v8di)
4271e5cb 4783 _mm512_undefined_epi32 (),
756c5857
AI
4784 (__mmask8) -1);
4785}
4786
4787extern __inline __m512i
4788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4789_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4790{
4791 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4792 (__v8di) __B,
4793 (__v8di) __W,
4794 (__mmask8) __U);
4795}
4796
4797extern __inline __m512i
4798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4799_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4800{
4801 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4802 (__v8di) __B,
4803 (__v8di)
4804 _mm512_setzero_si512 (),
4805 (__mmask8) __U);
4806}
4807
4808#ifdef __OPTIMIZE__
4809extern __inline __m256i
4810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4811_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4812{
4813 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4814 (__v8si)
0b192937 4815 _mm256_undefined_si256 (),
756c5857
AI
4816 (__mmask8) -1, __R);
4817}
4818
4819extern __inline __m256i
4820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4821_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4822 const int __R)
4823{
4824 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4825 (__v8si) __W,
4826 (__mmask8) __U, __R);
4827}
4828
4829extern __inline __m256i
4830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4831_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4832{
4833 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4834 (__v8si)
4835 _mm256_setzero_si256 (),
4836 (__mmask8) __U, __R);
4837}
4838
4839extern __inline __m256i
4840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4841_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4842{
4843 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4844 (__v8si)
0b192937 4845 _mm256_undefined_si256 (),
756c5857
AI
4846 (__mmask8) -1, __R);
4847}
4848
4849extern __inline __m256i
4850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4851_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4852 const int __R)
4853{
4854 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4855 (__v8si) __W,
4856 (__mmask8) __U, __R);
4857}
4858
4859extern __inline __m256i
4860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4861_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4862{
4863 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4864 (__v8si)
4865 _mm256_setzero_si256 (),
4866 (__mmask8) __U, __R);
4867}
4868#else
4869#define _mm512_cvtt_roundpd_epi32(A, B) \
0b192937 4870 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4871
4872#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4873 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4874
4875#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4876 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4877
4878#define _mm512_cvtt_roundpd_epu32(A, B) \
0b192937 4879 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4880
4881#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4882 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4883
4884#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4885 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4886#endif
4887
4888#ifdef __OPTIMIZE__
4889extern __inline __m256i
4890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4891_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4892{
4893 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4894 (__v8si)
0b192937 4895 _mm256_undefined_si256 (),
756c5857
AI
4896 (__mmask8) -1, __R);
4897}
4898
4899extern __inline __m256i
4900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4901_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4902 const int __R)
4903{
4904 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4905 (__v8si) __W,
4906 (__mmask8) __U, __R);
4907}
4908
4909extern __inline __m256i
4910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4911_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4912{
4913 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4914 (__v8si)
4915 _mm256_setzero_si256 (),
4916 (__mmask8) __U, __R);
4917}
4918
4919extern __inline __m256i
4920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4922{
4923 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4924 (__v8si)
0b192937 4925 _mm256_undefined_si256 (),
756c5857
AI
4926 (__mmask8) -1, __R);
4927}
4928
4929extern __inline __m256i
4930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4931_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4932 const int __R)
4933{
4934 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4935 (__v8si) __W,
4936 (__mmask8) __U, __R);
4937}
4938
4939extern __inline __m256i
4940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4941_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4942{
4943 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4944 (__v8si)
4945 _mm256_setzero_si256 (),
4946 (__mmask8) __U, __R);
4947}
4948#else
4949#define _mm512_cvt_roundpd_epi32(A, B) \
0b192937 4950 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4951
4952#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4953 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4954
4955#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4956 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4957
4958#define _mm512_cvt_roundpd_epu32(A, B) \
0b192937 4959 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4960
4961#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4962 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4963
4964#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4965 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4966#endif
4967
4968#ifdef __OPTIMIZE__
4969extern __inline __m512i
4970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4971_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4972{
4973 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4974 (__v16si)
4271e5cb 4975 _mm512_undefined_epi32 (),
756c5857
AI
4976 (__mmask16) -1, __R);
4977}
4978
4979extern __inline __m512i
4980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4981_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4982 const int __R)
4983{
4984 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4985 (__v16si) __W,
4986 (__mmask16) __U, __R);
4987}
4988
4989extern __inline __m512i
4990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4991_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4992{
4993 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4994 (__v16si)
4995 _mm512_setzero_si512 (),
4996 (__mmask16) __U, __R);
4997}
4998
4999extern __inline __m512i
5000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5001_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
5002{
5003 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
5004 (__v16si)
4271e5cb 5005 _mm512_undefined_epi32 (),
756c5857
AI
5006 (__mmask16) -1, __R);
5007}
5008
5009extern __inline __m512i
5010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5011_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
5012 const int __R)
5013{
5014 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
5015 (__v16si) __W,
5016 (__mmask16) __U, __R);
5017}
5018
5019extern __inline __m512i
5020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5021_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
5022{
5023 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
5024 (__v16si)
5025 _mm512_setzero_si512 (),
5026 (__mmask16) __U, __R);
5027}
5028#else
5029#define _mm512_cvtt_roundps_epi32(A, B) \
4271e5cb 5030 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5031
5032#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
5033 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
5034
5035#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
5036 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5037
5038#define _mm512_cvtt_roundps_epu32(A, B) \
4271e5cb 5039 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5040
5041#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
5042 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
5043
5044#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
5045 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5046#endif
5047
5048#ifdef __OPTIMIZE__
5049extern __inline __m512i
5050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5051_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
5052{
5053 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5054 (__v16si)
4271e5cb 5055 _mm512_undefined_epi32 (),
756c5857
AI
5056 (__mmask16) -1, __R);
5057}
5058
5059extern __inline __m512i
5060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5061_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
5062 const int __R)
5063{
5064 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5065 (__v16si) __W,
5066 (__mmask16) __U, __R);
5067}
5068
5069extern __inline __m512i
5070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5071_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
5072{
5073 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5074 (__v16si)
5075 _mm512_setzero_si512 (),
5076 (__mmask16) __U, __R);
5077}
5078
5079extern __inline __m512i
5080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5081_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
5082{
5083 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5084 (__v16si)
4271e5cb 5085 _mm512_undefined_epi32 (),
756c5857
AI
5086 (__mmask16) -1, __R);
5087}
5088
5089extern __inline __m512i
5090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5091_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
5092 const int __R)
5093{
5094 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5095 (__v16si) __W,
5096 (__mmask16) __U, __R);
5097}
5098
5099extern __inline __m512i
5100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5101_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
5102{
5103 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5104 (__v16si)
5105 _mm512_setzero_si512 (),
5106 (__mmask16) __U, __R);
5107}
5108#else
5109#define _mm512_cvt_roundps_epi32(A, B) \
4271e5cb 5110 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5111
5112#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
5113 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
5114
5115#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
5116 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5117
5118#define _mm512_cvt_roundps_epu32(A, B) \
4271e5cb 5119 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5120
5121#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
5122 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
5123
5124#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
5125 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5126#endif
5127
5128extern __inline __m128d
5129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130_mm_cvtu32_sd (__m128d __A, unsigned __B)
5131{
5132 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
5133}
5134
5135#ifdef __x86_64__
5136#ifdef __OPTIMIZE__
5137extern __inline __m128d
5138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5139_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
5140{
5141 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
5142}
5143
5144extern __inline __m128d
5145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5146_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
5147{
5148 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5149}
5150
5151extern __inline __m128d
5152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5153_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
5154{
5155 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5156}
5157#else
5158#define _mm_cvt_roundu64_sd(A, B, C) \
5159 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
5160
5161#define _mm_cvt_roundi64_sd(A, B, C) \
5162 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5163
5164#define _mm_cvt_roundsi64_sd(A, B, C) \
5165 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5166#endif
5167
5168#endif
5169
5170#ifdef __OPTIMIZE__
5171extern __inline __m128
5172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5173_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
5174{
5175 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
5176}
5177
5178extern __inline __m128
5179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5180_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
5181{
5182 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5183}
5184
5185extern __inline __m128
5186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
5188{
5189 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5190}
5191#else
5192#define _mm_cvt_roundu32_ss(A, B, C) \
5193 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
5194
5195#define _mm_cvt_roundi32_ss(A, B, C) \
5196 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5197
5198#define _mm_cvt_roundsi32_ss(A, B, C) \
5199 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5200#endif
5201
5202#ifdef __x86_64__
5203#ifdef __OPTIMIZE__
5204extern __inline __m128
5205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
5207{
5208 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
5209}
5210
5211extern __inline __m128
5212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5213_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
5214{
5215 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5216}
5217
5218extern __inline __m128
5219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5220_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
5221{
5222 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5223}
5224#else
5225#define _mm_cvt_roundu64_ss(A, B, C) \
5226 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
5227
5228#define _mm_cvt_roundi64_ss(A, B, C) \
5229 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5230
5231#define _mm_cvt_roundsi64_ss(A, B, C) \
5232 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5233#endif
5234
5235#endif
5236
5237extern __inline __m128i
5238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5239_mm512_cvtepi32_epi8 (__m512i __A)
5240{
0b192937
UD
5241 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5242 (__v16qi)
5243 _mm_undefined_si128 (),
756c5857
AI
5244 (__mmask16) -1);
5245}
5246
d256b866
IT
5247extern __inline void
5248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5249_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5250{
5251 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5252}
5253
756c5857
AI
5254extern __inline __m128i
5255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5256_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5257{
5258 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5259 (__v16qi) __O, __M);
5260}
5261
5262extern __inline __m128i
5263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5264_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5265{
5266 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5267 (__v16qi)
5268 _mm_setzero_si128 (),
5269 __M);
5270}
5271
5272extern __inline __m128i
5273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5274_mm512_cvtsepi32_epi8 (__m512i __A)
5275{
0b192937
UD
5276 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5277 (__v16qi)
5278 _mm_undefined_si128 (),
756c5857
AI
5279 (__mmask16) -1);
5280}
5281
d256b866
IT
5282extern __inline void
5283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5284_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5285{
5286 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5287}
5288
756c5857
AI
5289extern __inline __m128i
5290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5291_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5292{
5293 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5294 (__v16qi) __O, __M);
5295}
5296
5297extern __inline __m128i
5298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5299_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5300{
5301 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5302 (__v16qi)
5303 _mm_setzero_si128 (),
5304 __M);
5305}
5306
5307extern __inline __m128i
5308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5309_mm512_cvtusepi32_epi8 (__m512i __A)
5310{
0b192937
UD
5311 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5312 (__v16qi)
5313 _mm_undefined_si128 (),
756c5857
AI
5314 (__mmask16) -1);
5315}
5316
d256b866
IT
5317extern __inline void
5318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5319_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5320{
5321 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5322}
5323
756c5857
AI
5324extern __inline __m128i
5325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5326_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5327{
5328 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5329 (__v16qi) __O,
5330 __M);
5331}
5332
5333extern __inline __m128i
5334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5335_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5336{
5337 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5338 (__v16qi)
5339 _mm_setzero_si128 (),
5340 __M);
5341}
5342
5343extern __inline __m256i
5344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5345_mm512_cvtepi32_epi16 (__m512i __A)
5346{
0b192937
UD
5347 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5348 (__v16hi)
5349 _mm256_undefined_si256 (),
756c5857
AI
5350 (__mmask16) -1);
5351}
5352
d256b866
IT
5353extern __inline void
5354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5355_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5356{
5357 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5358}
5359
756c5857
AI
5360extern __inline __m256i
5361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5362_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5363{
5364 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5365 (__v16hi) __O, __M);
5366}
5367
5368extern __inline __m256i
5369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5370_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5371{
5372 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5373 (__v16hi)
5374 _mm256_setzero_si256 (),
5375 __M);
5376}
5377
5378extern __inline __m256i
5379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5380_mm512_cvtsepi32_epi16 (__m512i __A)
5381{
0b192937
UD
5382 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5383 (__v16hi)
5384 _mm256_undefined_si256 (),
756c5857
AI
5385 (__mmask16) -1);
5386}
5387
d256b866
IT
5388extern __inline void
5389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5390_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5391{
5392 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5393}
5394
756c5857
AI
5395extern __inline __m256i
5396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5397_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5398{
5399 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5400 (__v16hi) __O, __M);
5401}
5402
5403extern __inline __m256i
5404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5405_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5406{
5407 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5408 (__v16hi)
5409 _mm256_setzero_si256 (),
5410 __M);
5411}
5412
5413extern __inline __m256i
5414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5415_mm512_cvtusepi32_epi16 (__m512i __A)
5416{
0b192937
UD
5417 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5418 (__v16hi)
5419 _mm256_undefined_si256 (),
756c5857
AI
5420 (__mmask16) -1);
5421}
5422
d256b866
IT
5423extern __inline void
5424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5426{
5427 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5428}
5429
756c5857
AI
5430extern __inline __m256i
5431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5432_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5433{
5434 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5435 (__v16hi) __O,
5436 __M);
5437}
5438
5439extern __inline __m256i
5440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5441_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5442{
5443 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5444 (__v16hi)
5445 _mm256_setzero_si256 (),
5446 __M);
5447}
5448
5449extern __inline __m256i
5450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5451_mm512_cvtepi64_epi32 (__m512i __A)
5452{
0b192937
UD
5453 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5454 (__v8si)
5455 _mm256_undefined_si256 (),
756c5857
AI
5456 (__mmask8) -1);
5457}
5458
d256b866
IT
5459extern __inline void
5460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5461_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5462{
5463 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5464}
5465
756c5857
AI
5466extern __inline __m256i
5467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5468_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5469{
5470 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5471 (__v8si) __O, __M);
5472}
5473
5474extern __inline __m256i
5475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5476_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5477{
5478 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5479 (__v8si)
5480 _mm256_setzero_si256 (),
5481 __M);
5482}
5483
5484extern __inline __m256i
5485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5486_mm512_cvtsepi64_epi32 (__m512i __A)
5487{
0b192937
UD
5488 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5489 (__v8si)
5490 _mm256_undefined_si256 (),
756c5857
AI
5491 (__mmask8) -1);
5492}
5493
d256b866
IT
5494extern __inline void
5495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5496_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5497{
5498 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5499}
5500
756c5857
AI
5501extern __inline __m256i
5502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5503_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5504{
5505 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5506 (__v8si) __O, __M);
5507}
5508
5509extern __inline __m256i
5510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5511_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5512{
5513 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5514 (__v8si)
5515 _mm256_setzero_si256 (),
5516 __M);
5517}
5518
5519extern __inline __m256i
5520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5521_mm512_cvtusepi64_epi32 (__m512i __A)
5522{
0b192937
UD
5523 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5524 (__v8si)
5525 _mm256_undefined_si256 (),
756c5857
AI
5526 (__mmask8) -1);
5527}
5528
6fb82517 5529extern __inline void
d256b866
IT
5530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5531_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5532{
5533 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5534}
5535
756c5857
AI
5536extern __inline __m256i
5537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5538_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5539{
5540 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5541 (__v8si) __O, __M);
5542}
5543
5544extern __inline __m256i
5545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5546_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5547{
5548 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5549 (__v8si)
5550 _mm256_setzero_si256 (),
5551 __M);
5552}
5553
5554extern __inline __m128i
5555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5556_mm512_cvtepi64_epi16 (__m512i __A)
5557{
0b192937
UD
5558 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5559 (__v8hi)
5560 _mm_undefined_si128 (),
756c5857
AI
5561 (__mmask8) -1);
5562}
5563
d256b866
IT
5564extern __inline void
5565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5566_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5567{
5568 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5569}
5570
756c5857
AI
5571extern __inline __m128i
5572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5573_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5574{
5575 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5576 (__v8hi) __O, __M);
5577}
5578
5579extern __inline __m128i
5580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5581_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5582{
5583 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5584 (__v8hi)
5585 _mm_setzero_si128 (),
5586 __M);
5587}
5588
5589extern __inline __m128i
5590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5591_mm512_cvtsepi64_epi16 (__m512i __A)
5592{
0b192937
UD
5593 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5594 (__v8hi)
5595 _mm_undefined_si128 (),
756c5857
AI
5596 (__mmask8) -1);
5597}
5598
d256b866
IT
5599extern __inline void
5600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5601_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5602{
5603 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5604}
5605
756c5857
AI
5606extern __inline __m128i
5607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5608_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5609{
5610 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5611 (__v8hi) __O, __M);
5612}
5613
5614extern __inline __m128i
5615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5616_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5617{
5618 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5619 (__v8hi)
5620 _mm_setzero_si128 (),
5621 __M);
5622}
5623
5624extern __inline __m128i
5625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5626_mm512_cvtusepi64_epi16 (__m512i __A)
5627{
0b192937
UD
5628 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5629 (__v8hi)
5630 _mm_undefined_si128 (),
756c5857
AI
5631 (__mmask8) -1);
5632}
5633
d256b866
IT
5634extern __inline void
5635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5636_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5637{
5638 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5639}
5640
756c5857
AI
5641extern __inline __m128i
5642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5643_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5644{
5645 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5646 (__v8hi) __O, __M);
5647}
5648
5649extern __inline __m128i
5650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5651_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5652{
5653 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5654 (__v8hi)
5655 _mm_setzero_si128 (),
5656 __M);
5657}
5658
5659extern __inline __m128i
5660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5661_mm512_cvtepi64_epi8 (__m512i __A)
5662{
0b192937
UD
5663 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5664 (__v16qi)
5665 _mm_undefined_si128 (),
756c5857
AI
5666 (__mmask8) -1);
5667}
5668
d256b866
IT
5669extern __inline void
5670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5671_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5672{
4a948703 5673 __builtin_ia32_pmovqb512mem_mask ((unsigned long long *) __P,
5674 (__v8di) __A, __M);
d256b866
IT
5675}
5676
756c5857
AI
5677extern __inline __m128i
5678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5679_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5680{
5681 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5682 (__v16qi) __O, __M);
5683}
5684
5685extern __inline __m128i
5686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5688{
5689 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5690 (__v16qi)
5691 _mm_setzero_si128 (),
5692 __M);
5693}
5694
5695extern __inline __m128i
5696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5697_mm512_cvtsepi64_epi8 (__m512i __A)
5698{
0b192937
UD
5699 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5700 (__v16qi)
5701 _mm_undefined_si128 (),
756c5857
AI
5702 (__mmask8) -1);
5703}
5704
d256b866
IT
5705extern __inline void
5706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5707_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5708{
4a948703 5709 __builtin_ia32_pmovsqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
d256b866
IT
5710}
5711
756c5857
AI
5712extern __inline __m128i
5713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5714_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5715{
5716 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5717 (__v16qi) __O, __M);
5718}
5719
5720extern __inline __m128i
5721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5722_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5723{
5724 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5725 (__v16qi)
5726 _mm_setzero_si128 (),
5727 __M);
5728}
5729
5730extern __inline __m128i
5731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5732_mm512_cvtusepi64_epi8 (__m512i __A)
5733{
0b192937
UD
5734 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5735 (__v16qi)
5736 _mm_undefined_si128 (),
756c5857
AI
5737 (__mmask8) -1);
5738}
5739
d256b866
IT
5740extern __inline void
5741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5742_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5743{
4a948703 5744 __builtin_ia32_pmovusqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
d256b866
IT
5745}
5746
756c5857
AI
5747extern __inline __m128i
5748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5749_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5750{
5751 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5752 (__v16qi) __O,
5753 __M);
5754}
5755
5756extern __inline __m128i
5757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5758_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5759{
5760 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5761 (__v16qi)
5762 _mm_setzero_si128 (),
5763 __M);
5764}
5765
5766extern __inline __m512d
5767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5768_mm512_cvtepi32_pd (__m256i __A)
5769{
5770 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5771 (__v8df)
0b192937 5772 _mm512_undefined_pd (),
756c5857
AI
5773 (__mmask8) -1);
5774}
5775
5776extern __inline __m512d
5777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5778_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5779{
5780 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5781 (__v8df) __W,
5782 (__mmask8) __U);
5783}
5784
5785extern __inline __m512d
5786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5787_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5788{
5789 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5790 (__v8df)
5791 _mm512_setzero_pd (),
5792 (__mmask8) __U);
5793}
5794
5795extern __inline __m512d
5796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5797_mm512_cvtepu32_pd (__m256i __A)
5798{
5799 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5800 (__v8df)
0b192937 5801 _mm512_undefined_pd (),
756c5857
AI
5802 (__mmask8) -1);
5803}
5804
5805extern __inline __m512d
5806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5807_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5808{
5809 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5810 (__v8df) __W,
5811 (__mmask8) __U);
5812}
5813
5814extern __inline __m512d
5815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5816_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5817{
5818 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5819 (__v8df)
5820 _mm512_setzero_pd (),
5821 (__mmask8) __U);
5822}
5823
5824#ifdef __OPTIMIZE__
5825extern __inline __m512
5826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5827_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5828{
5829 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5830 (__v16sf)
0b192937 5831 _mm512_undefined_ps (),
756c5857
AI
5832 (__mmask16) -1, __R);
5833}
5834
5835extern __inline __m512
5836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5838 const int __R)
5839{
5840 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5841 (__v16sf) __W,
5842 (__mmask16) __U, __R);
5843}
5844
5845extern __inline __m512
5846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5847_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5848{
5849 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5850 (__v16sf)
5851 _mm512_setzero_ps (),
5852 (__mmask16) __U, __R);
5853}
5854
5855extern __inline __m512
5856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5857_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5858{
5859 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5860 (__v16sf)
0b192937 5861 _mm512_undefined_ps (),
756c5857
AI
5862 (__mmask16) -1, __R);
5863}
5864
5865extern __inline __m512
5866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5868 const int __R)
5869{
5870 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5871 (__v16sf) __W,
5872 (__mmask16) __U, __R);
5873}
5874
5875extern __inline __m512
5876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5878{
5879 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5880 (__v16sf)
5881 _mm512_setzero_ps (),
5882 (__mmask16) __U, __R);
5883}
5884
5885#else
5886#define _mm512_cvt_roundepi32_ps(A, B) \
0b192937 5887 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5888
5889#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5890 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5891
5892#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5893 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5894
5895#define _mm512_cvt_roundepu32_ps(A, B) \
0b192937 5896 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5897
5898#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5899 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5900
5901#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5902 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5903#endif
5904
5905#ifdef __OPTIMIZE__
5906extern __inline __m256d
5907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5908_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5909{
5910 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5911 __imm,
5912 (__v4df)
0b192937 5913 _mm256_undefined_pd (),
756c5857
AI
5914 (__mmask8) -1);
5915}
5916
5917extern __inline __m256d
5918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5919_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5920 const int __imm)
5921{
5922 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5923 __imm,
5924 (__v4df) __W,
5925 (__mmask8) __U);
5926}
5927
5928extern __inline __m256d
5929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5931{
5932 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5933 __imm,
5934 (__v4df)
5935 _mm256_setzero_pd (),
5936 (__mmask8) __U);
5937}
5938
5939extern __inline __m128
5940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5941_mm512_extractf32x4_ps (__m512 __A, const int __imm)
5942{
5943 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5944 __imm,
5945 (__v4sf)
0b192937 5946 _mm_undefined_ps (),
756c5857
AI
5947 (__mmask8) -1);
5948}
5949
5950extern __inline __m128
5951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5952_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5953 const int __imm)
5954{
5955 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5956 __imm,
5957 (__v4sf) __W,
5958 (__mmask8) __U);
5959}
5960
5961extern __inline __m128
5962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5963_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5964{
5965 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5966 __imm,
5967 (__v4sf)
5968 _mm_setzero_ps (),
5969 (__mmask8) __U);
5970}
5971
5972extern __inline __m256i
5973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5974_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5975{
5976 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5977 __imm,
5978 (__v4di)
0b192937 5979 _mm256_undefined_si256 (),
756c5857
AI
5980 (__mmask8) -1);
5981}
5982
5983extern __inline __m256i
5984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5985_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5986 const int __imm)
5987{
5988 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5989 __imm,
5990 (__v4di) __W,
5991 (__mmask8) __U);
5992}
5993
5994extern __inline __m256i
5995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5997{
5998 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5999 __imm,
6000 (__v4di)
6001 _mm256_setzero_si256 (),
6002 (__mmask8) __U);
6003}
6004
6005extern __inline __m128i
6006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6007_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
6008{
6009 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
6010 __imm,
6011 (__v4si)
0b192937 6012 _mm_undefined_si128 (),
756c5857
AI
6013 (__mmask8) -1);
6014}
6015
6016extern __inline __m128i
6017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6018_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
6019 const int __imm)
6020{
6021 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
6022 __imm,
6023 (__v4si) __W,
6024 (__mmask8) __U);
6025}
6026
6027extern __inline __m128i
6028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6029_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
6030{
6031 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
6032 __imm,
6033 (__v4si)
6034 _mm_setzero_si128 (),
6035 (__mmask8) __U);
6036}
6037#else
6038
6039#define _mm512_extractf64x4_pd(X, C) \
6040 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
6041 (int) (C),\
0b192937 6042 (__v4df)(__m256d)_mm256_undefined_pd(),\
756c5857
AI
6043 (__mmask8)-1))
6044
6045#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
6046 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
6047 (int) (C),\
6048 (__v4df)(__m256d)(W),\
6049 (__mmask8)(U)))
6050
6051#define _mm512_maskz_extractf64x4_pd(U, X, C) \
6052 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
6053 (int) (C),\
6054 (__v4df)(__m256d)_mm256_setzero_pd(),\
6055 (__mmask8)(U)))
6056
6057#define _mm512_extractf32x4_ps(X, C) \
6058 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6059 (int) (C),\
0b192937 6060 (__v4sf)(__m128)_mm_undefined_ps(),\
756c5857
AI
6061 (__mmask8)-1))
6062
6063#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
6064 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6065 (int) (C),\
6066 (__v4sf)(__m128)(W),\
6067 (__mmask8)(U)))
6068
6069#define _mm512_maskz_extractf32x4_ps(U, X, C) \
6070 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6071 (int) (C),\
6072 (__v4sf)(__m128)_mm_setzero_ps(),\
6073 (__mmask8)(U)))
6074
6075#define _mm512_extracti64x4_epi64(X, C) \
6076 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6077 (int) (C),\
0b192937 6078 (__v4di)(__m256i)_mm256_undefined_si256 (),\
756c5857
AI
6079 (__mmask8)-1))
6080
6081#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
6082 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6083 (int) (C),\
6084 (__v4di)(__m256i)(W),\
6085 (__mmask8)(U)))
6086
6087#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
6088 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6089 (int) (C),\
6090 (__v4di)(__m256i)_mm256_setzero_si256 (),\
6091 (__mmask8)(U)))
6092
6093#define _mm512_extracti32x4_epi32(X, C) \
6094 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6095 (int) (C),\
0b192937 6096 (__v4si)(__m128i)_mm_undefined_si128 (),\
756c5857
AI
6097 (__mmask8)-1))
6098
6099#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
6100 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6101 (int) (C),\
6102 (__v4si)(__m128i)(W),\
6103 (__mmask8)(U)))
6104
6105#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
6106 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6107 (int) (C),\
6108 (__v4si)(__m128i)_mm_setzero_si128 (),\
6109 (__mmask8)(U)))
6110#endif
6111
6112#ifdef __OPTIMIZE__
6113extern __inline __m512i
6114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6115_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
6116{
6117 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
6118 (__v4si) __B,
6119 __imm,
6120 (__v16si) __A, -1);
6121}
6122
6123extern __inline __m512
6124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6125_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
6126{
6127 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
6128 (__v4sf) __B,
6129 __imm,
6130 (__v16sf) __A, -1);
6131}
6132
6133extern __inline __m512i
6134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6135_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
6136{
6137 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6138 (__v4di) __B,
6139 __imm,
6140 (__v8di)
4271e5cb 6141 _mm512_undefined_epi32 (),
756c5857
AI
6142 (__mmask8) -1);
6143}
6144
6145extern __inline __m512i
6146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6147_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
6148 __m256i __B, const int __imm)
6149{
6150 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6151 (__v4di) __B,
6152 __imm,
6153 (__v8di) __W,
6154 (__mmask8) __U);
6155}
6156
6157extern __inline __m512i
6158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6159_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
6160 const int __imm)
6161{
6162 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6163 (__v4di) __B,
6164 __imm,
6165 (__v8di)
6166 _mm512_setzero_si512 (),
6167 (__mmask8) __U);
6168}
6169
6170extern __inline __m512d
6171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
6173{
6174 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6175 (__v4df) __B,
6176 __imm,
6177 (__v8df)
0b192937 6178 _mm512_undefined_pd (),
756c5857
AI
6179 (__mmask8) -1);
6180}
6181
6182extern __inline __m512d
6183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6184_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
6185 __m256d __B, const int __imm)
6186{
6187 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6188 (__v4df) __B,
6189 __imm,
6190 (__v8df) __W,
6191 (__mmask8) __U);
6192}
6193
6194extern __inline __m512d
6195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6196_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
6197 const int __imm)
6198{
6199 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6200 (__v4df) __B,
6201 __imm,
6202 (__v8df)
6203 _mm512_setzero_pd (),
6204 (__mmask8) __U);
6205}
6206#else
6207#define _mm512_insertf32x4(X, Y, C) \
6208 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
6209 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
6210
6211#define _mm512_inserti32x4(X, Y, C) \
6212 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
6213 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
6214
6215#define _mm512_insertf64x4(X, Y, C) \
6216 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6217 (__v4df)(__m256d) (Y), (int) (C), \
0b192937 6218 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
6219 (__mmask8)-1))
6220
6221#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
6222 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6223 (__v4df)(__m256d) (Y), (int) (C), \
6224 (__v8df)(__m512d)(W), \
6225 (__mmask8)(U)))
6226
6227#define _mm512_maskz_insertf64x4(U, X, Y, C) \
6228 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6229 (__v4df)(__m256d) (Y), (int) (C), \
6230 (__v8df)(__m512d)_mm512_setzero_pd(), \
6231 (__mmask8)(U)))
6232
6233#define _mm512_inserti64x4(X, Y, C) \
6234 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6235 (__v4di)(__m256i) (Y), (int) (C), \
4271e5cb 6236 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
6237 (__mmask8)-1))
6238
6239#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
6240 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6241 (__v4di)(__m256i) (Y), (int) (C),\
6242 (__v8di)(__m512i)(W),\
6243 (__mmask8)(U)))
6244
6245#define _mm512_maskz_inserti64x4(U, X, Y, C) \
6246 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6247 (__v4di)(__m256i) (Y), (int) (C), \
6248 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6249 (__mmask8)(U)))
6250#endif
6251
6252extern __inline __m512d
6253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6254_mm512_loadu_pd (void const *__P)
6255{
c6b0037d 6256 return *(__m512d_u *)__P;
756c5857
AI
6257}
6258
6259extern __inline __m512d
6260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6261_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6262{
fc9cf6da 6263 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6264 (__v8df) __W,
6265 (__mmask8) __U);
6266}
6267
6268extern __inline __m512d
6269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6271{
fc9cf6da 6272 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6273 (__v8df)
6274 _mm512_setzero_pd (),
6275 (__mmask8) __U);
6276}
6277
6278extern __inline void
6279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6280_mm512_storeu_pd (void *__P, __m512d __A)
6281{
c6b0037d 6282 *(__m512d_u *)__P = __A;
756c5857
AI
6283}
6284
6285extern __inline void
6286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6287_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6288{
fc9cf6da 6289 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
756c5857
AI
6290 (__mmask8) __U);
6291}
6292
6293extern __inline __m512
6294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6295_mm512_loadu_ps (void const *__P)
6296{
c6b0037d 6297 return *(__m512_u *)__P;
756c5857
AI
6298}
6299
6300extern __inline __m512
6301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6302_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6303{
fc9cf6da 6304 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6305 (__v16sf) __W,
6306 (__mmask16) __U);
6307}
6308
6309extern __inline __m512
6310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6311_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6312{
fc9cf6da 6313 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6314 (__v16sf)
6315 _mm512_setzero_ps (),
6316 (__mmask16) __U);
6317}
6318
6319extern __inline void
6320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6321_mm512_storeu_ps (void *__P, __m512 __A)
6322{
c6b0037d 6323 *(__m512_u *)__P = __A;
756c5857
AI
6324}
6325
6326extern __inline void
6327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6328_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6329{
fc9cf6da 6330 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
756c5857
AI
6331 (__mmask16) __U);
6332}
6333
459d21c6
JJ
6334extern __inline __m128
6335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6336_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
6337{
6338 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
6339}
6340
6341extern __inline __m128
6342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6343_mm_maskz_load_ss (__mmask8 __U, const float *__P)
6344{
6345 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
6346 __U);
6347}
6348
6349extern __inline __m128d
6350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6351_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
6352{
6353 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
6354}
6355
6356extern __inline __m128d
6357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6358_mm_maskz_load_sd (__mmask8 __U, const double *__P)
6359{
6360 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
6361 __U);
6362}
6363
6364extern __inline __m128
6365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6366_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6367{
6368 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
6369 (__v4sf) __W, __U);
6370}
6371
6372extern __inline __m128
6373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6374_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
6375{
6376 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
6377 (__v4sf) _mm_setzero_ps (), __U);
6378}
6379
6380extern __inline __m128d
6381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6382_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6383{
6384 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
6385 (__v2df) __W, __U);
6386}
6387
6388extern __inline __m128d
6389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6390_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
6391{
6392 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
6393 (__v2df) _mm_setzero_pd (),
6394 __U);
6395}
6396
6397extern __inline void
6398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6399_mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
6400{
6401 __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
6402}
6403
6404extern __inline void
6405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6406_mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
6407{
6408 __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
6409}
6410
4c98bdad
SP
6411extern __inline __m512i
6412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6413_mm512_loadu_epi64 (void const *__P)
6414{
6415 return *(__m512i_u *) __P;
6416}
6417
756c5857
AI
6418extern __inline __m512i
6419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6420_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6421{
fc9cf6da 6422 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6423 (__v8di) __W,
6424 (__mmask8) __U);
6425}
6426
6427extern __inline __m512i
6428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6429_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6430{
fc9cf6da 6431 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6432 (__v8di)
6433 _mm512_setzero_si512 (),
6434 (__mmask8) __U);
6435}
6436
4c98bdad
SP
6437extern __inline void
6438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6439_mm512_storeu_epi64 (void *__P, __m512i __A)
6440{
6441 *(__m512i_u *) __P = (__m512i_u) __A;
6442}
6443
756c5857
AI
6444extern __inline void
6445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6447{
fc9cf6da 6448 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
756c5857
AI
6449 (__mmask8) __U);
6450}
6451
6452extern __inline __m512i
6453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6454_mm512_loadu_si512 (void const *__P)
756c5857 6455{
c6b0037d 6456 return *(__m512i_u *)__P;
756c5857
AI
6457}
6458
4c98bdad
SP
6459extern __inline __m512i
6460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6461_mm512_loadu_epi32 (void const *__P)
6462{
6463 return *(__m512i_u *) __P;
6464}
6465
756c5857
AI
6466extern __inline __m512i
6467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6469{
fc9cf6da 6470 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6471 (__v16si) __W,
6472 (__mmask16) __U);
6473}
6474
6475extern __inline __m512i
6476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6477_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6478{
fc9cf6da 6479 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6480 (__v16si)
6481 _mm512_setzero_si512 (),
6482 (__mmask16) __U);
6483}
6484
6485extern __inline void
6486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6487_mm512_storeu_si512 (void *__P, __m512i __A)
756c5857 6488{
c6b0037d 6489 *(__m512i_u *)__P = __A;
756c5857
AI
6490}
6491
4c98bdad
SP
6492extern __inline void
6493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6494_mm512_storeu_epi32 (void *__P, __m512i __A)
6495{
6496 *(__m512i_u *) __P = (__m512i_u) __A;
6497}
6498
756c5857
AI
6499extern __inline void
6500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6501_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6502{
fc9cf6da 6503 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
756c5857
AI
6504 (__mmask16) __U);
6505}
6506
6507extern __inline __m512d
6508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6509_mm512_permutevar_pd (__m512d __A, __m512i __C)
6510{
6511 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6512 (__v8di) __C,
6513 (__v8df)
0b192937 6514 _mm512_undefined_pd (),
756c5857
AI
6515 (__mmask8) -1);
6516}
6517
6518extern __inline __m512d
6519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6520_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6521{
6522 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6523 (__v8di) __C,
6524 (__v8df) __W,
6525 (__mmask8) __U);
6526}
6527
6528extern __inline __m512d
6529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6531{
6532 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6533 (__v8di) __C,
6534 (__v8df)
6535 _mm512_setzero_pd (),
6536 (__mmask8) __U);
6537}
6538
6539extern __inline __m512
6540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6541_mm512_permutevar_ps (__m512 __A, __m512i __C)
6542{
6543 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6544 (__v16si) __C,
6545 (__v16sf)
0b192937 6546 _mm512_undefined_ps (),
756c5857
AI
6547 (__mmask16) -1);
6548}
6549
6550extern __inline __m512
6551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6552_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6553{
6554 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6555 (__v16si) __C,
6556 (__v16sf) __W,
6557 (__mmask16) __U);
6558}
6559
6560extern __inline __m512
6561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6562_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6563{
6564 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6565 (__v16si) __C,
6566 (__v16sf)
6567 _mm512_setzero_ps (),
6568 (__mmask16) __U);
6569}
6570
6571extern __inline __m512i
6572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6573_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6574{
6575 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6576 /* idx */ ,
6577 (__v8di) __A,
6578 (__v8di) __B,
6579 (__mmask8) -1);
6580}
6581
6582extern __inline __m512i
6583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6584_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6585 __m512i __B)
6586{
6587 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6588 /* idx */ ,
6589 (__v8di) __A,
6590 (__v8di) __B,
6591 (__mmask8) __U);
6592}
6593
6594extern __inline __m512i
6595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6596_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6597 __mmask8 __U, __m512i __B)
6598{
6599 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6600 (__v8di) __I
6601 /* idx */ ,
6602 (__v8di) __B,
6603 (__mmask8) __U);
6604}
6605
6606extern __inline __m512i
6607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6608_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6609 __m512i __I, __m512i __B)
6610{
6611 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6612 /* idx */ ,
6613 (__v8di) __A,
6614 (__v8di) __B,
6615 (__mmask8) __U);
6616}
6617
6618extern __inline __m512i
6619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6620_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6621{
6622 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6623 /* idx */ ,
6624 (__v16si) __A,
6625 (__v16si) __B,
6626 (__mmask16) -1);
6627}
6628
6629extern __inline __m512i
6630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6631_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6632 __m512i __I, __m512i __B)
6633{
6634 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6635 /* idx */ ,
6636 (__v16si) __A,
6637 (__v16si) __B,
6638 (__mmask16) __U);
6639}
6640
6641extern __inline __m512i
6642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6643_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6644 __mmask16 __U, __m512i __B)
6645{
6646 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6647 (__v16si) __I
6648 /* idx */ ,
6649 (__v16si) __B,
6650 (__mmask16) __U);
6651}
6652
6653extern __inline __m512i
6654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6655_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6656 __m512i __I, __m512i __B)
6657{
6658 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6659 /* idx */ ,
6660 (__v16si) __A,
6661 (__v16si) __B,
6662 (__mmask16) __U);
6663}
6664
6665extern __inline __m512d
6666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6667_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6668{
6669 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6670 /* idx */ ,
6671 (__v8df) __A,
6672 (__v8df) __B,
6673 (__mmask8) -1);
6674}
6675
6676extern __inline __m512d
6677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6678_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6679 __m512d __B)
6680{
6681 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6682 /* idx */ ,
6683 (__v8df) __A,
6684 (__v8df) __B,
6685 (__mmask8) __U);
6686}
6687
6688extern __inline __m512d
6689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6690_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6691 __m512d __B)
6692{
6693 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6694 (__v8di) __I
6695 /* idx */ ,
6696 (__v8df) __B,
6697 (__mmask8) __U);
6698}
6699
6700extern __inline __m512d
6701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6702_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6703 __m512d __B)
6704{
6705 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6706 /* idx */ ,
6707 (__v8df) __A,
6708 (__v8df) __B,
6709 (__mmask8) __U);
6710}
6711
6712extern __inline __m512
6713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6714_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6715{
6716 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6717 /* idx */ ,
6718 (__v16sf) __A,
6719 (__v16sf) __B,
6720 (__mmask16) -1);
6721}
6722
6723extern __inline __m512
6724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6725_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6726{
6727 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6728 /* idx */ ,
6729 (__v16sf) __A,
6730 (__v16sf) __B,
6731 (__mmask16) __U);
6732}
6733
6734extern __inline __m512
6735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6736_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6737 __m512 __B)
6738{
6739 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6740 (__v16si) __I
6741 /* idx */ ,
6742 (__v16sf) __B,
6743 (__mmask16) __U);
6744}
6745
6746extern __inline __m512
6747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6748_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6749 __m512 __B)
6750{
6751 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6752 /* idx */ ,
6753 (__v16sf) __A,
6754 (__v16sf) __B,
6755 (__mmask16) __U);
6756}
6757
6758#ifdef __OPTIMIZE__
6759extern __inline __m512d
6760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6761_mm512_permute_pd (__m512d __X, const int __C)
6762{
6763 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6764 (__v8df)
0b192937 6765 _mm512_undefined_pd (),
756c5857
AI
6766 (__mmask8) -1);
6767}
6768
6769extern __inline __m512d
6770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6771_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6772{
6773 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6774 (__v8df) __W,
6775 (__mmask8) __U);
6776}
6777
6778extern __inline __m512d
6779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6780_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6781{
6782 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6783 (__v8df)
6784 _mm512_setzero_pd (),
6785 (__mmask8) __U);
6786}
6787
6788extern __inline __m512
6789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790_mm512_permute_ps (__m512 __X, const int __C)
6791{
6792 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6793 (__v16sf)
0b192937 6794 _mm512_undefined_ps (),
756c5857
AI
6795 (__mmask16) -1);
6796}
6797
6798extern __inline __m512
6799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6800_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6801{
6802 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6803 (__v16sf) __W,
6804 (__mmask16) __U);
6805}
6806
6807extern __inline __m512
6808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6809_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6810{
6811 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6812 (__v16sf)
6813 _mm512_setzero_ps (),
6814 (__mmask16) __U);
6815}
6816#else
6817#define _mm512_permute_pd(X, C) \
6818 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
0b192937 6819 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6820 (__mmask8)(-1)))
6821
6822#define _mm512_mask_permute_pd(W, U, X, C) \
6823 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6824 (__v8df)(__m512d)(W), \
6825 (__mmask8)(U)))
6826
6827#define _mm512_maskz_permute_pd(U, X, C) \
6828 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6829 (__v8df)(__m512d)_mm512_setzero_pd(), \
6830 (__mmask8)(U)))
6831
6832#define _mm512_permute_ps(X, C) \
6833 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
0b192937 6834 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6835 (__mmask16)(-1)))
6836
6837#define _mm512_mask_permute_ps(W, U, X, C) \
6838 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6839 (__v16sf)(__m512)(W), \
6840 (__mmask16)(U)))
6841
6842#define _mm512_maskz_permute_ps(U, X, C) \
6843 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6844 (__v16sf)(__m512)_mm512_setzero_ps(), \
6845 (__mmask16)(U)))
6846#endif
6847
6848#ifdef __OPTIMIZE__
6849extern __inline __m512i
6850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6851_mm512_permutex_epi64 (__m512i __X, const int __I)
6852{
6853 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6854 (__v8di)
4271e5cb 6855 _mm512_undefined_epi32 (),
756c5857
AI
6856 (__mmask8) (-1));
6857}
6858
6859extern __inline __m512i
6860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6861_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6862 __m512i __X, const int __I)
6863{
6864 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6865 (__v8di) __W,
6866 (__mmask8) __M);
6867}
6868
6869extern __inline __m512i
6870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6871_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6872{
6873 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6874 (__v8di)
6875 _mm512_setzero_si512 (),
6876 (__mmask8) __M);
6877}
6878
6879extern __inline __m512d
6880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6881_mm512_permutex_pd (__m512d __X, const int __M)
6882{
6883 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6884 (__v8df)
0b192937 6885 _mm512_undefined_pd (),
756c5857
AI
6886 (__mmask8) -1);
6887}
6888
6889extern __inline __m512d
6890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6891_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6892{
6893 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6894 (__v8df) __W,
6895 (__mmask8) __U);
6896}
6897
6898extern __inline __m512d
6899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6901{
6902 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6903 (__v8df)
6904 _mm512_setzero_pd (),
6905 (__mmask8) __U);
6906}
6907#else
6908#define _mm512_permutex_pd(X, M) \
6909 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
0b192937
UD
6910 (__v8df)(__m512d)_mm512_undefined_pd(),\
6911 (__mmask8)-1))
756c5857
AI
6912
6913#define _mm512_mask_permutex_pd(W, U, X, M) \
6914 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6915 (__v8df)(__m512d)(W), (__mmask8)(U)))
6916
6917#define _mm512_maskz_permutex_pd(U, X, M) \
6918 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6919 (__v8df)(__m512d)_mm512_setzero_pd(),\
6920 (__mmask8)(U)))
6921
6922#define _mm512_permutex_epi64(X, I) \
6923 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6924 (int)(I), \
0b192937 6925 (__v8di)(__m512i) \
4271e5cb 6926 (_mm512_undefined_epi32 ()),\
756c5857
AI
6927 (__mmask8)(-1)))
6928
6929#define _mm512_maskz_permutex_epi64(M, X, I) \
6930 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6931 (int)(I), \
6932 (__v8di)(__m512i) \
6933 (_mm512_setzero_si512 ()),\
6934 (__mmask8)(M)))
6935
6936#define _mm512_mask_permutex_epi64(W, M, X, I) \
6937 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6938 (int)(I), \
6939 (__v8di)(__m512i)(W), \
6940 (__mmask8)(M)))
6941#endif
6942
6943extern __inline __m512i
6944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6945_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6946{
583a9919
KY
6947 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6948 (__v8di) __X,
756c5857
AI
6949 (__v8di)
6950 _mm512_setzero_si512 (),
6951 __M);
6952}
6953
6954extern __inline __m512i
6955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6956_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6957{
583a9919
KY
6958 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6959 (__v8di) __X,
756c5857 6960 (__v8di)
4271e5cb 6961 _mm512_undefined_epi32 (),
756c5857
AI
6962 (__mmask8) -1);
6963}
6964
6965extern __inline __m512i
6966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6967_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6968 __m512i __Y)
6969{
583a9919
KY
6970 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6971 (__v8di) __X,
756c5857
AI
6972 (__v8di) __W,
6973 __M);
6974}
6975
6976extern __inline __m512i
6977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6978_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6979{
583a9919
KY
6980 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6981 (__v16si) __X,
756c5857
AI
6982 (__v16si)
6983 _mm512_setzero_si512 (),
6984 __M);
6985}
6986
6987extern __inline __m512i
6988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6989_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6990{
583a9919
KY
6991 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6992 (__v16si) __X,
756c5857 6993 (__v16si)
4271e5cb 6994 _mm512_undefined_epi32 (),
756c5857
AI
6995 (__mmask16) -1);
6996}
6997
6998extern __inline __m512i
6999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7000_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
7001 __m512i __Y)
7002{
583a9919
KY
7003 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7004 (__v16si) __X,
756c5857
AI
7005 (__v16si) __W,
7006 __M);
7007}
7008
7009extern __inline __m512d
7010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7011_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
7012{
7013 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7014 (__v8di) __X,
7015 (__v8df)
0b192937 7016 _mm512_undefined_pd (),
756c5857
AI
7017 (__mmask8) -1);
7018}
7019
7020extern __inline __m512d
7021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7022_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
7023{
7024 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7025 (__v8di) __X,
7026 (__v8df) __W,
7027 (__mmask8) __U);
7028}
7029
7030extern __inline __m512d
7031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
7033{
7034 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7035 (__v8di) __X,
7036 (__v8df)
7037 _mm512_setzero_pd (),
7038 (__mmask8) __U);
7039}
7040
7041extern __inline __m512
7042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
7044{
7045 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7046 (__v16si) __X,
7047 (__v16sf)
0b192937 7048 _mm512_undefined_ps (),
756c5857
AI
7049 (__mmask16) -1);
7050}
7051
7052extern __inline __m512
7053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
7055{
7056 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7057 (__v16si) __X,
7058 (__v16sf) __W,
7059 (__mmask16) __U);
7060}
7061
7062extern __inline __m512
7063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7064_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
7065{
7066 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7067 (__v16si) __X,
7068 (__v16sf)
7069 _mm512_setzero_ps (),
7070 (__mmask16) __U);
7071}
7072
7073#ifdef __OPTIMIZE__
7074extern __inline __m512
7075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
7077{
7078 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7079 (__v16sf) __V, __imm,
7080 (__v16sf)
0b192937 7081 _mm512_undefined_ps (),
756c5857
AI
7082 (__mmask16) -1);
7083}
7084
7085extern __inline __m512
7086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
7088 __m512 __V, const int __imm)
7089{
7090 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7091 (__v16sf) __V, __imm,
7092 (__v16sf) __W,
7093 (__mmask16) __U);
7094}
7095
7096extern __inline __m512
7097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
7099{
7100 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7101 (__v16sf) __V, __imm,
7102 (__v16sf)
7103 _mm512_setzero_ps (),
7104 (__mmask16) __U);
7105}
7106
7107extern __inline __m512d
7108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
7110{
7111 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7112 (__v8df) __V, __imm,
7113 (__v8df)
0b192937 7114 _mm512_undefined_pd (),
756c5857
AI
7115 (__mmask8) -1);
7116}
7117
7118extern __inline __m512d
7119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
7121 __m512d __V, const int __imm)
7122{
7123 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7124 (__v8df) __V, __imm,
7125 (__v8df) __W,
7126 (__mmask8) __U);
7127}
7128
7129extern __inline __m512d
7130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
7132 const int __imm)
7133{
7134 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7135 (__v8df) __V, __imm,
7136 (__v8df)
7137 _mm512_setzero_pd (),
7138 (__mmask8) __U);
7139}
7140
7141extern __inline __m512d
7142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7143_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
756c5857
AI
7144 const int __imm, const int __R)
7145{
040d2bba
WX
7146 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
7147 (__v8df) __B,
7148 (__v8di) __C,
756c5857 7149 __imm,
040d2bba 7150 (__mmask8) -1, __R);
756c5857
AI
7151}
7152
7153extern __inline __m512d
7154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7155_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
7156 __m512i __C, const int __imm, const int __R)
756c5857
AI
7157{
7158 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
040d2bba
WX
7159 (__v8df) __B,
7160 (__v8di) __C,
756c5857
AI
7161 __imm,
7162 (__mmask8) __U, __R);
7163}
7164
7165extern __inline __m512d
7166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7167_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
7168 __m512i __C, const int __imm, const int __R)
756c5857
AI
7169{
7170 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
040d2bba
WX
7171 (__v8df) __B,
7172 (__v8di) __C,
756c5857
AI
7173 __imm,
7174 (__mmask8) __U, __R);
7175}
7176
7177extern __inline __m512
7178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7179_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
756c5857
AI
7180 const int __imm, const int __R)
7181{
040d2bba
WX
7182 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
7183 (__v16sf) __B,
7184 (__v16si) __C,
756c5857 7185 __imm,
040d2bba 7186 (__mmask16) -1, __R);
756c5857
AI
7187}
7188
7189extern __inline __m512
7190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7191_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
7192 __m512i __C, const int __imm, const int __R)
756c5857
AI
7193{
7194 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
040d2bba
WX
7195 (__v16sf) __B,
7196 (__v16si) __C,
756c5857
AI
7197 __imm,
7198 (__mmask16) __U, __R);
7199}
7200
7201extern __inline __m512
7202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7203_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
7204 __m512i __C, const int __imm, const int __R)
756c5857
AI
7205{
7206 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
040d2bba
WX
7207 (__v16sf) __B,
7208 (__v16si) __C,
756c5857
AI
7209 __imm,
7210 (__mmask16) __U, __R);
7211}
7212
7213extern __inline __m128d
7214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7215_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
756c5857
AI
7216 const int __imm, const int __R)
7217{
040d2bba
WX
7218 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
7219 (__v2df) __B,
7220 (__v2di) __C, __imm,
7221 (__mmask8) -1, __R);
756c5857
AI
7222}
7223
7224extern __inline __m128d
7225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7226_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
7227 __m128i __C, const int __imm, const int __R)
756c5857
AI
7228{
7229 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
040d2bba
WX
7230 (__v2df) __B,
7231 (__v2di) __C, __imm,
756c5857
AI
7232 (__mmask8) __U, __R);
7233}
7234
7235extern __inline __m128d
7236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7237_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
7238 __m128i __C, const int __imm, const int __R)
756c5857
AI
7239{
7240 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
040d2bba
WX
7241 (__v2df) __B,
7242 (__v2di) __C,
756c5857
AI
7243 __imm,
7244 (__mmask8) __U, __R);
7245}
7246
7247extern __inline __m128
7248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7249_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
756c5857
AI
7250 const int __imm, const int __R)
7251{
040d2bba
WX
7252 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7253 (__v4sf) __B,
7254 (__v4si) __C, __imm,
7255 (__mmask8) -1, __R);
756c5857
AI
7256}
7257
7258extern __inline __m128
7259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7260_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
7261 __m128i __C, const int __imm, const int __R)
756c5857
AI
7262{
7263 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
040d2bba
WX
7264 (__v4sf) __B,
7265 (__v4si) __C, __imm,
756c5857
AI
7266 (__mmask8) __U, __R);
7267}
7268
7269extern __inline __m128
7270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7271_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
7272 __m128i __C, const int __imm, const int __R)
756c5857
AI
7273{
7274 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
040d2bba
WX
7275 (__v4sf) __B,
7276 (__v4si) __C, __imm,
756c5857
AI
7277 (__mmask8) __U, __R);
7278}
7279
7280#else
7281#define _mm512_shuffle_pd(X, Y, C) \
7282 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7283 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 7284 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
7285 (__mmask8)-1))
7286
7287#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
7288 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7289 (__v8df)(__m512d)(Y), (int)(C),\
7290 (__v8df)(__m512d)(W),\
7291 (__mmask8)(U)))
7292
7293#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
7294 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7295 (__v8df)(__m512d)(Y), (int)(C),\
7296 (__v8df)(__m512d)_mm512_setzero_pd(),\
7297 (__mmask8)(U)))
7298
7299#define _mm512_shuffle_ps(X, Y, C) \
7300 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7301 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 7302 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
7303 (__mmask16)-1))
7304
7305#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
7306 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7307 (__v16sf)(__m512)(Y), (int)(C),\
7308 (__v16sf)(__m512)(W),\
7309 (__mmask16)(U)))
7310
7311#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
7312 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7313 (__v16sf)(__m512)(Y), (int)(C),\
7314 (__v16sf)(__m512)_mm512_setzero_ps(),\
7315 (__mmask16)(U)))
7316
040d2bba
WX
7317#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
7318 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7319 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7320 (__mmask8)(-1), (R)))
756c5857 7321
040d2bba 7322#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
756c5857 7323 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
040d2bba 7324 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
7325 (__mmask8)(U), (R)))
7326
040d2bba 7327#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
756c5857 7328 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
040d2bba 7329 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
7330 (__mmask8)(U), (R)))
7331
040d2bba
WX
7332#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
7333 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7334 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7335 (__mmask16)(-1), (R)))
756c5857 7336
040d2bba 7337#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
756c5857 7338 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
040d2bba
WX
7339 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7340 (__mmask16)(U), (R)))
756c5857 7341
040d2bba 7342#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
756c5857 7343 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
040d2bba 7344 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
756c5857
AI
7345 (__mmask16)(U), (R)))
7346
040d2bba
WX
7347#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
7348 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7349 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7350 (__mmask8)(-1), (R)))
756c5857 7351
040d2bba 7352#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
756c5857 7353 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
040d2bba
WX
7354 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7355 (__mmask8)(U), (R)))
756c5857 7356
040d2bba 7357#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
756c5857 7358 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
040d2bba 7359 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
756c5857
AI
7360 (__mmask8)(U), (R)))
7361
040d2bba
WX
7362#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7363 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7364 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7365 (__mmask8)(-1), (R)))
756c5857 7366
040d2bba 7367#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
756c5857 7368 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
040d2bba
WX
7369 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7370 (__mmask8)(U), (R)))
756c5857 7371
040d2bba 7372#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
756c5857 7373 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
040d2bba 7374 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
756c5857
AI
7375 (__mmask8)(U), (R)))
7376#endif
7377
7378extern __inline __m512
7379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7380_mm512_movehdup_ps (__m512 __A)
7381{
7382 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7383 (__v16sf)
0b192937 7384 _mm512_undefined_ps (),
756c5857
AI
7385 (__mmask16) -1);
7386}
7387
7388extern __inline __m512
7389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7390_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7391{
7392 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7393 (__v16sf) __W,
7394 (__mmask16) __U);
7395}
7396
7397extern __inline __m512
7398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7399_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7400{
7401 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7402 (__v16sf)
7403 _mm512_setzero_ps (),
7404 (__mmask16) __U);
7405}
7406
7407extern __inline __m512
7408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7409_mm512_moveldup_ps (__m512 __A)
7410{
7411 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7412 (__v16sf)
0b192937 7413 _mm512_undefined_ps (),
756c5857
AI
7414 (__mmask16) -1);
7415}
7416
7417extern __inline __m512
7418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7419_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7420{
7421 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7422 (__v16sf) __W,
7423 (__mmask16) __U);
7424}
7425
7426extern __inline __m512
7427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7428_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7429{
7430 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7431 (__v16sf)
7432 _mm512_setzero_ps (),
7433 (__mmask16) __U);
7434}
7435
7436extern __inline __m512i
7437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7438_mm512_or_si512 (__m512i __A, __m512i __B)
7439{
2069d6fc 7440 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7441}
7442
7443extern __inline __m512i
7444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7445_mm512_or_epi32 (__m512i __A, __m512i __B)
7446{
2069d6fc 7447 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7448}
7449
7450extern __inline __m512i
7451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7452_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7453{
7454 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7455 (__v16si) __B,
7456 (__v16si) __W,
7457 (__mmask16) __U);
7458}
7459
7460extern __inline __m512i
7461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7462_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7463{
7464 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7465 (__v16si) __B,
7466 (__v16si)
7467 _mm512_setzero_si512 (),
7468 (__mmask16) __U);
7469}
7470
7471extern __inline __m512i
7472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7473_mm512_or_epi64 (__m512i __A, __m512i __B)
7474{
2069d6fc 7475 return (__m512i) ((__v8du) __A | (__v8du) __B);
756c5857
AI
7476}
7477
7478extern __inline __m512i
7479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7480_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7481{
7482 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7483 (__v8di) __B,
7484 (__v8di) __W,
7485 (__mmask8) __U);
7486}
7487
7488extern __inline __m512i
7489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7491{
7492 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7493 (__v8di) __B,
7494 (__v8di)
7495 _mm512_setzero_si512 (),
7496 (__mmask8) __U);
7497}
7498
7499extern __inline __m512i
7500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501_mm512_xor_si512 (__m512i __A, __m512i __B)
7502{
2069d6fc 7503 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7504}
7505
7506extern __inline __m512i
7507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508_mm512_xor_epi32 (__m512i __A, __m512i __B)
7509{
2069d6fc 7510 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7511}
7512
7513extern __inline __m512i
7514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7515_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7516{
7517 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7518 (__v16si) __B,
7519 (__v16si) __W,
7520 (__mmask16) __U);
7521}
7522
7523extern __inline __m512i
7524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7525_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7526{
7527 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7528 (__v16si) __B,
7529 (__v16si)
7530 _mm512_setzero_si512 (),
7531 (__mmask16) __U);
7532}
7533
7534extern __inline __m512i
7535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7536_mm512_xor_epi64 (__m512i __A, __m512i __B)
7537{
2069d6fc 7538 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
756c5857
AI
7539}
7540
7541extern __inline __m512i
7542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 7543_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857
AI
7544{
7545 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7546 (__v8di) __B,
7547 (__v8di) __W,
7548 (__mmask8) __U);
7549}
7550
7551extern __inline __m512i
7552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 7553_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857
AI
7554{
7555 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7556 (__v8di) __B,
7557 (__v8di)
7558 _mm512_setzero_si512 (),
7559 (__mmask8) __U);
7560}
7561
7562#ifdef __OPTIMIZE__
7563extern __inline __m512i
7564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565_mm512_rol_epi32 (__m512i __A, const int __B)
7566{
7567 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7568 (__v16si)
4271e5cb 7569 _mm512_undefined_epi32 (),
756c5857
AI
7570 (__mmask16) -1);
7571}
7572
7573extern __inline __m512i
7574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7575_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7576{
7577 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7578 (__v16si) __W,
7579 (__mmask16) __U);
7580}
7581
7582extern __inline __m512i
7583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7584_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7585{
7586 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7587 (__v16si)
7588 _mm512_setzero_si512 (),
7589 (__mmask16) __U);
7590}
7591
7592extern __inline __m512i
7593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7594_mm512_ror_epi32 (__m512i __A, int __B)
7595{
7596 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7597 (__v16si)
4271e5cb 7598 _mm512_undefined_epi32 (),
756c5857
AI
7599 (__mmask16) -1);
7600}
7601
7602extern __inline __m512i
7603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7604_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7605{
7606 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7607 (__v16si) __W,
7608 (__mmask16) __U);
7609}
7610
7611extern __inline __m512i
7612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7613_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7614{
7615 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7616 (__v16si)
7617 _mm512_setzero_si512 (),
7618 (__mmask16) __U);
7619}
7620
7621extern __inline __m512i
7622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7623_mm512_rol_epi64 (__m512i __A, const int __B)
7624{
7625 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7626 (__v8di)
4271e5cb 7627 _mm512_undefined_epi32 (),
756c5857
AI
7628 (__mmask8) -1);
7629}
7630
7631extern __inline __m512i
7632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7633_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7634{
7635 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7636 (__v8di) __W,
7637 (__mmask8) __U);
7638}
7639
7640extern __inline __m512i
7641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7642_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7643{
7644 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7645 (__v8di)
7646 _mm512_setzero_si512 (),
7647 (__mmask8) __U);
7648}
7649
7650extern __inline __m512i
7651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7652_mm512_ror_epi64 (__m512i __A, int __B)
7653{
7654 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7655 (__v8di)
4271e5cb 7656 _mm512_undefined_epi32 (),
756c5857
AI
7657 (__mmask8) -1);
7658}
7659
7660extern __inline __m512i
7661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7662_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7663{
7664 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7665 (__v8di) __W,
7666 (__mmask8) __U);
7667}
7668
7669extern __inline __m512i
7670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7671_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7672{
7673 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7674 (__v8di)
7675 _mm512_setzero_si512 (),
7676 (__mmask8) __U);
7677}
7678
7679#else
7680#define _mm512_rol_epi32(A, B) \
7681 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7682 (int)(B), \
4271e5cb 7683 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7684 (__mmask16)(-1)))
7685#define _mm512_mask_rol_epi32(W, U, A, B) \
7686 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7687 (int)(B), \
7688 (__v16si)(__m512i)(W), \
7689 (__mmask16)(U)))
7690#define _mm512_maskz_rol_epi32(U, A, B) \
7691 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7692 (int)(B), \
7693 (__v16si)_mm512_setzero_si512 (), \
7694 (__mmask16)(U)))
7695#define _mm512_ror_epi32(A, B) \
7696 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7697 (int)(B), \
4271e5cb 7698 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7699 (__mmask16)(-1)))
7700#define _mm512_mask_ror_epi32(W, U, A, B) \
7701 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7702 (int)(B), \
7703 (__v16si)(__m512i)(W), \
7704 (__mmask16)(U)))
7705#define _mm512_maskz_ror_epi32(U, A, B) \
7706 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7707 (int)(B), \
7708 (__v16si)_mm512_setzero_si512 (), \
7709 (__mmask16)(U)))
7710#define _mm512_rol_epi64(A, B) \
7711 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7712 (int)(B), \
4271e5cb 7713 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7714 (__mmask8)(-1)))
7715#define _mm512_mask_rol_epi64(W, U, A, B) \
7716 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7717 (int)(B), \
7718 (__v8di)(__m512i)(W), \
7719 (__mmask8)(U)))
7720#define _mm512_maskz_rol_epi64(U, A, B) \
7721 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7722 (int)(B), \
7723 (__v8di)_mm512_setzero_si512 (), \
7724 (__mmask8)(U)))
7725
7726#define _mm512_ror_epi64(A, B) \
7727 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7728 (int)(B), \
4271e5cb 7729 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7730 (__mmask8)(-1)))
7731#define _mm512_mask_ror_epi64(W, U, A, B) \
7732 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7733 (int)(B), \
7734 (__v8di)(__m512i)(W), \
7735 (__mmask8)(U)))
7736#define _mm512_maskz_ror_epi64(U, A, B) \
7737 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7738 (int)(B), \
7739 (__v8di)_mm512_setzero_si512 (), \
7740 (__mmask8)(U)))
7741#endif
7742
7743extern __inline __m512i
7744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7745_mm512_and_si512 (__m512i __A, __m512i __B)
7746{
2069d6fc 7747 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7748}
7749
7750extern __inline __m512i
7751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7752_mm512_and_epi32 (__m512i __A, __m512i __B)
7753{
2069d6fc 7754 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7755}
7756
7757extern __inline __m512i
7758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7759_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7760{
7761 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7762 (__v16si) __B,
7763 (__v16si) __W,
7764 (__mmask16) __U);
7765}
7766
7767extern __inline __m512i
7768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7769_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7770{
7771 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7772 (__v16si) __B,
7773 (__v16si)
7774 _mm512_setzero_si512 (),
7775 (__mmask16) __U);
7776}
7777
7778extern __inline __m512i
7779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7780_mm512_and_epi64 (__m512i __A, __m512i __B)
7781{
2069d6fc 7782 return (__m512i) ((__v8du) __A & (__v8du) __B);
756c5857
AI
7783}
7784
7785extern __inline __m512i
7786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7788{
7789 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7790 (__v8di) __B,
7791 (__v8di) __W, __U);
7792}
7793
7794extern __inline __m512i
7795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7796_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7797{
7798 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7799 (__v8di) __B,
7800 (__v8di)
7801 _mm512_setzero_pd (),
7802 __U);
7803}
7804
7805extern __inline __m512i
7806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7807_mm512_andnot_si512 (__m512i __A, __m512i __B)
7808{
7809 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7810 (__v16si) __B,
7811 (__v16si)
4271e5cb 7812 _mm512_undefined_epi32 (),
756c5857
AI
7813 (__mmask16) -1);
7814}
7815
7816extern __inline __m512i
7817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7818_mm512_andnot_epi32 (__m512i __A, __m512i __B)
7819{
7820 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7821 (__v16si) __B,
7822 (__v16si)
4271e5cb 7823 _mm512_undefined_epi32 (),
756c5857
AI
7824 (__mmask16) -1);
7825}
7826
7827extern __inline __m512i
7828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7829_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7830{
7831 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7832 (__v16si) __B,
7833 (__v16si) __W,
7834 (__mmask16) __U);
7835}
7836
7837extern __inline __m512i
7838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7839_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7840{
7841 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7842 (__v16si) __B,
7843 (__v16si)
7844 _mm512_setzero_si512 (),
7845 (__mmask16) __U);
7846}
7847
7848extern __inline __m512i
7849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7850_mm512_andnot_epi64 (__m512i __A, __m512i __B)
7851{
7852 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7853 (__v8di) __B,
7854 (__v8di)
4271e5cb 7855 _mm512_undefined_epi32 (),
756c5857
AI
7856 (__mmask8) -1);
7857}
7858
7859extern __inline __m512i
7860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7861_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7862{
7863 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7864 (__v8di) __B,
7865 (__v8di) __W, __U);
7866}
7867
7868extern __inline __m512i
7869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7870_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7871{
7872 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7873 (__v8di) __B,
7874 (__v8di)
7875 _mm512_setzero_pd (),
7876 __U);
7877}
7878
7879extern __inline __mmask16
7880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7881_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7882{
7883 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7884 (__v16si) __B,
7885 (__mmask16) -1);
7886}
7887
7888extern __inline __mmask16
7889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7891{
7892 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7893 (__v16si) __B, __U);
7894}
7895
7896extern __inline __mmask8
7897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7898_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7899{
7900 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7901 (__v8di) __B,
7902 (__mmask8) -1);
7903}
7904
7905extern __inline __mmask8
7906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7907_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7908{
7909 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7910}
7911
260d3642
IT
7912extern __inline __mmask16
7913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7914_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7915{
7916 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7917 (__v16si) __B,
7918 (__mmask16) -1);
7919}
7920
7921extern __inline __mmask16
7922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7923_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7924{
7925 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7926 (__v16si) __B, __U);
7927}
7928
7929extern __inline __mmask8
7930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7931_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7932{
7933 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7934 (__v8di) __B,
7935 (__mmask8) -1);
7936}
7937
7938extern __inline __mmask8
7939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7940_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7941{
7942 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7943 (__v8di) __B, __U);
7944}
7945
dcb2c527
JJ
7946extern __inline __m512
7947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7948_mm512_abs_ps (__m512 __A)
7949{
7950 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7951 _mm512_set1_epi32 (0x7fffffff));
7952}
7953
7954extern __inline __m512
7955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7956_mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7957{
7958 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7959 _mm512_set1_epi32 (0x7fffffff));
7960}
7961
7962extern __inline __m512d
7963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23bce99c 7964_mm512_abs_pd (__m512d __A)
dcb2c527
JJ
7965{
7966 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7967 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7968}
7969
7970extern __inline __m512d
7971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23bce99c 7972_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
dcb2c527
JJ
7973{
7974 return (__m512d)
7975 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7976 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7977}
7978
756c5857
AI
7979extern __inline __m512i
7980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7981_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7982{
7983 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7984 (__v16si) __B,
7985 (__v16si)
4271e5cb 7986 _mm512_undefined_epi32 (),
756c5857
AI
7987 (__mmask16) -1);
7988}
7989
7990extern __inline __m512i
7991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7992_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7993 __m512i __B)
7994{
7995 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7996 (__v16si) __B,
7997 (__v16si) __W,
7998 (__mmask16) __U);
7999}
8000
8001extern __inline __m512i
8002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8003_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
8004{
8005 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
8006 (__v16si) __B,
8007 (__v16si)
8008 _mm512_setzero_si512 (),
8009 (__mmask16) __U);
8010}
8011
8012extern __inline __m512i
8013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8014_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
8015{
8016 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
8017 (__v8di) __B,
8018 (__v8di)
4271e5cb 8019 _mm512_undefined_epi32 (),
756c5857
AI
8020 (__mmask8) -1);
8021}
8022
8023extern __inline __m512i
8024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8025_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
8026{
8027 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
8028 (__v8di) __B,
8029 (__v8di) __W,
8030 (__mmask8) __U);
8031}
8032
8033extern __inline __m512i
8034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8035_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
8036{
8037 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
8038 (__v8di) __B,
8039 (__v8di)
8040 _mm512_setzero_si512 (),
8041 (__mmask8) __U);
8042}
8043
8044extern __inline __m512i
8045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8046_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
8047{
8048 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
8049 (__v16si) __B,
8050 (__v16si)
4271e5cb 8051 _mm512_undefined_epi32 (),
756c5857
AI
8052 (__mmask16) -1);
8053}
8054
8055extern __inline __m512i
8056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8057_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8058 __m512i __B)
8059{
8060 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
8061 (__v16si) __B,
8062 (__v16si) __W,
8063 (__mmask16) __U);
8064}
8065
8066extern __inline __m512i
8067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8068_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
8069{
8070 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
8071 (__v16si) __B,
8072 (__v16si)
8073 _mm512_setzero_si512 (),
8074 (__mmask16) __U);
8075}
8076
8077extern __inline __m512i
8078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8079_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
8080{
8081 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8082 (__v8di) __B,
8083 (__v8di)
4271e5cb 8084 _mm512_undefined_epi32 (),
756c5857
AI
8085 (__mmask8) -1);
8086}
8087
8088extern __inline __m512i
8089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8090_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
8091{
8092 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8093 (__v8di) __B,
8094 (__v8di) __W,
8095 (__mmask8) __U);
8096}
8097
8098extern __inline __m512i
8099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8100_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
8101{
8102 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8103 (__v8di) __B,
8104 (__v8di)
8105 _mm512_setzero_si512 (),
8106 (__mmask8) __U);
8107}
8108
8109#ifdef __x86_64__
8110#ifdef __OPTIMIZE__
8111extern __inline unsigned long long
8112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113_mm_cvt_roundss_u64 (__m128 __A, const int __R)
8114{
8115 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
8116}
8117
8118extern __inline long long
8119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8120_mm_cvt_roundss_si64 (__m128 __A, const int __R)
8121{
8122 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
8123}
8124
8125extern __inline long long
8126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8127_mm_cvt_roundss_i64 (__m128 __A, const int __R)
8128{
8129 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
8130}
8131
8132extern __inline unsigned long long
8133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8134_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
8135{
8136 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
8137}
8138
8139extern __inline long long
8140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8141_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
8142{
8143 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
8144}
8145
8146extern __inline long long
8147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8148_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
8149{
8150 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
8151}
8152#else
8153#define _mm_cvt_roundss_u64(A, B) \
8154 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
8155
8156#define _mm_cvt_roundss_si64(A, B) \
8157 ((long long)__builtin_ia32_vcvtss2si64(A, B))
8158
8159#define _mm_cvt_roundss_i64(A, B) \
8160 ((long long)__builtin_ia32_vcvtss2si64(A, B))
8161
8162#define _mm_cvtt_roundss_u64(A, B) \
8163 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
8164
8165#define _mm_cvtt_roundss_i64(A, B) \
8166 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8167
8168#define _mm_cvtt_roundss_si64(A, B) \
8169 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8170#endif
8171#endif
8172
8173#ifdef __OPTIMIZE__
8174extern __inline unsigned
8175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8176_mm_cvt_roundss_u32 (__m128 __A, const int __R)
8177{
8178 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
8179}
8180
8181extern __inline int
8182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8183_mm_cvt_roundss_si32 (__m128 __A, const int __R)
8184{
8185 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8186}
8187
8188extern __inline int
8189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8190_mm_cvt_roundss_i32 (__m128 __A, const int __R)
8191{
8192 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8193}
8194
8195extern __inline unsigned
8196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8197_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
8198{
8199 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
8200}
8201
8202extern __inline int
8203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8204_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
8205{
8206 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8207}
8208
8209extern __inline int
8210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8211_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
8212{
8213 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8214}
8215#else
8216#define _mm_cvt_roundss_u32(A, B) \
8217 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
8218
8219#define _mm_cvt_roundss_si32(A, B) \
8220 ((int)__builtin_ia32_vcvtss2si32(A, B))
8221
8222#define _mm_cvt_roundss_i32(A, B) \
8223 ((int)__builtin_ia32_vcvtss2si32(A, B))
8224
8225#define _mm_cvtt_roundss_u32(A, B) \
8226 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
8227
8228#define _mm_cvtt_roundss_si32(A, B) \
8229 ((int)__builtin_ia32_vcvttss2si32(A, B))
8230
8231#define _mm_cvtt_roundss_i32(A, B) \
8232 ((int)__builtin_ia32_vcvttss2si32(A, B))
8233#endif
8234
8235#ifdef __x86_64__
8236#ifdef __OPTIMIZE__
8237extern __inline unsigned long long
8238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8239_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
8240{
8241 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
8242}
8243
8244extern __inline long long
8245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8246_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
8247{
8248 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8249}
8250
8251extern __inline long long
8252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8253_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
8254{
8255 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8256}
8257
8258extern __inline unsigned long long
8259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8260_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
8261{
8262 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
8263}
8264
8265extern __inline long long
8266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8267_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
8268{
8269 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8270}
8271
8272extern __inline long long
8273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8274_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
8275{
8276 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8277}
8278#else
8279#define _mm_cvt_roundsd_u64(A, B) \
8280 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
8281
8282#define _mm_cvt_roundsd_si64(A, B) \
8283 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8284
8285#define _mm_cvt_roundsd_i64(A, B) \
8286 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8287
8288#define _mm_cvtt_roundsd_u64(A, B) \
8289 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
8290
8291#define _mm_cvtt_roundsd_si64(A, B) \
8292 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8293
8294#define _mm_cvtt_roundsd_i64(A, B) \
8295 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8296#endif
8297#endif
8298
8299#ifdef __OPTIMIZE__
8300extern __inline unsigned
8301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8302_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
8303{
8304 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
8305}
8306
8307extern __inline int
8308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8309_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
8310{
8311 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8312}
8313
8314extern __inline int
8315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8316_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
8317{
8318 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8319}
8320
8321extern __inline unsigned
8322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8323_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
8324{
8325 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
8326}
8327
8328extern __inline int
8329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8330_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
8331{
8332 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8333}
8334
8335extern __inline int
8336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8337_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
8338{
8339 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8340}
8341#else
8342#define _mm_cvt_roundsd_u32(A, B) \
8343 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
8344
8345#define _mm_cvt_roundsd_si32(A, B) \
8346 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8347
8348#define _mm_cvt_roundsd_i32(A, B) \
8349 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8350
8351#define _mm_cvtt_roundsd_u32(A, B) \
8352 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8353
8354#define _mm_cvtt_roundsd_si32(A, B) \
8355 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8356
8357#define _mm_cvtt_roundsd_i32(A, B) \
8358 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8359#endif
8360
8361extern __inline __m512d
8362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8363_mm512_movedup_pd (__m512d __A)
8364{
8365 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8366 (__v8df)
0b192937 8367 _mm512_undefined_pd (),
756c5857
AI
8368 (__mmask8) -1);
8369}
8370
8371extern __inline __m512d
8372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8373_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8374{
8375 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8376 (__v8df) __W,
8377 (__mmask8) __U);
8378}
8379
8380extern __inline __m512d
8381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8382_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8383{
8384 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8385 (__v8df)
8386 _mm512_setzero_pd (),
8387 (__mmask8) __U);
8388}
8389
8390extern __inline __m512d
8391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8392_mm512_unpacklo_pd (__m512d __A, __m512d __B)
8393{
8394 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8395 (__v8df) __B,
8396 (__v8df)
0b192937 8397 _mm512_undefined_pd (),
756c5857
AI
8398 (__mmask8) -1);
8399}
8400
8401extern __inline __m512d
8402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8403_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8404{
8405 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8406 (__v8df) __B,
8407 (__v8df) __W,
8408 (__mmask8) __U);
8409}
8410
8411extern __inline __m512d
8412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8413_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8414{
8415 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8416 (__v8df) __B,
8417 (__v8df)
8418 _mm512_setzero_pd (),
8419 (__mmask8) __U);
8420}
8421
8422extern __inline __m512d
8423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8424_mm512_unpackhi_pd (__m512d __A, __m512d __B)
8425{
8426 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8427 (__v8df) __B,
8428 (__v8df)
0b192937 8429 _mm512_undefined_pd (),
756c5857
AI
8430 (__mmask8) -1);
8431}
8432
8433extern __inline __m512d
8434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8435_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8436{
8437 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8438 (__v8df) __B,
8439 (__v8df) __W,
8440 (__mmask8) __U);
8441}
8442
8443extern __inline __m512d
8444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8445_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8446{
8447 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8448 (__v8df) __B,
8449 (__v8df)
8450 _mm512_setzero_pd (),
8451 (__mmask8) __U);
8452}
8453
8454extern __inline __m512
8455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8456_mm512_unpackhi_ps (__m512 __A, __m512 __B)
8457{
8458 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8459 (__v16sf) __B,
8460 (__v16sf)
0b192937 8461 _mm512_undefined_ps (),
756c5857
AI
8462 (__mmask16) -1);
8463}
8464
8465extern __inline __m512
8466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8467_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8468{
8469 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8470 (__v16sf) __B,
8471 (__v16sf) __W,
8472 (__mmask16) __U);
8473}
8474
8475extern __inline __m512
8476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8477_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8478{
8479 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8480 (__v16sf) __B,
8481 (__v16sf)
8482 _mm512_setzero_ps (),
8483 (__mmask16) __U);
8484}
8485
8486#ifdef __OPTIMIZE__
8487extern __inline __m512d
8488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8489_mm512_cvt_roundps_pd (__m256 __A, const int __R)
8490{
8491 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8492 (__v8df)
0b192937 8493 _mm512_undefined_pd (),
756c5857
AI
8494 (__mmask8) -1, __R);
8495}
8496
8497extern __inline __m512d
8498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8499_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8500 const int __R)
8501{
8502 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8503 (__v8df) __W,
8504 (__mmask8) __U, __R);
8505}
8506
8507extern __inline __m512d
8508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8509_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8510{
8511 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8512 (__v8df)
8513 _mm512_setzero_pd (),
8514 (__mmask8) __U, __R);
8515}
8516
8517extern __inline __m512
8518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8519_mm512_cvt_roundph_ps (__m256i __A, const int __R)
8520{
8521 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8522 (__v16sf)
0b192937 8523 _mm512_undefined_ps (),
756c5857
AI
8524 (__mmask16) -1, __R);
8525}
8526
8527extern __inline __m512
8528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8529_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8530 const int __R)
8531{
8532 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8533 (__v16sf) __W,
8534 (__mmask16) __U, __R);
8535}
8536
8537extern __inline __m512
8538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8539_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8540{
8541 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8542 (__v16sf)
8543 _mm512_setzero_ps (),
8544 (__mmask16) __U, __R);
8545}
8546
8547extern __inline __m256i
8548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8549_mm512_cvt_roundps_ph (__m512 __A, const int __I)
8550{
8551 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8552 __I,
8553 (__v16hi)
0b192937 8554 _mm256_undefined_si256 (),
756c5857
AI
8555 -1);
8556}
8557
8558extern __inline __m256i
8559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8560_mm512_cvtps_ph (__m512 __A, const int __I)
8561{
8562 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8563 __I,
8564 (__v16hi)
0b192937 8565 _mm256_undefined_si256 (),
756c5857
AI
8566 -1);
8567}
8568
8569extern __inline __m256i
8570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8571_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8572 const int __I)
8573{
8574 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8575 __I,
8576 (__v16hi) __U,
8577 (__mmask16) __W);
8578}
8579
8580extern __inline __m256i
8581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8582_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8583{
8584 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8585 __I,
8586 (__v16hi) __U,
8587 (__mmask16) __W);
8588}
8589
8590extern __inline __m256i
8591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8592_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8593{
8594 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8595 __I,
8596 (__v16hi)
8597 _mm256_setzero_si256 (),
8598 (__mmask16) __W);
8599}
8600
8601extern __inline __m256i
8602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8603_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8604{
8605 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8606 __I,
8607 (__v16hi)
8608 _mm256_setzero_si256 (),
8609 (__mmask16) __W);
8610}
8611#else
8612#define _mm512_cvt_roundps_pd(A, B) \
0b192937 8613 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
756c5857
AI
8614
8615#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8616 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8617
8618#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8619 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8620
8621#define _mm512_cvt_roundph_ps(A, B) \
0b192937 8622 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
8623
8624#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8625 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8626
8627#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8628 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8629
8630#define _mm512_cvt_roundps_ph(A, I) \
0c8217b1 8631 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
0b192937 8632 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857 8633#define _mm512_cvtps_ph(A, I) \
0c8217b1 8634 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
0b192937 8635 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857 8636#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
0c8217b1 8637 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8638 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8639#define _mm512_mask_cvtps_ph(U, W, A, I) \
0c8217b1 8640 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8641 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8642#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
0c8217b1 8643 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8644 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8645#define _mm512_maskz_cvtps_ph(W, A, I) \
0c8217b1 8646 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8647 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8648#endif
8649
8650#ifdef __OPTIMIZE__
8651extern __inline __m256
8652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8653_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8654{
8655 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8656 (__v8sf)
0b192937 8657 _mm256_undefined_ps (),
756c5857
AI
8658 (__mmask8) -1, __R);
8659}
8660
8661extern __inline __m256
8662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8663_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8664 const int __R)
8665{
8666 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8667 (__v8sf) __W,
8668 (__mmask8) __U, __R);
8669}
8670
8671extern __inline __m256
8672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8673_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8674{
8675 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8676 (__v8sf)
8677 _mm256_setzero_ps (),
8678 (__mmask8) __U, __R);
8679}
8680
075691af
AI
8681extern __inline __m128
8682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8683_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8684{
8685 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8686 (__v2df) __B,
8687 __R);
8688}
8689
93103603
SP
8690extern __inline __m128
8691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8692_mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
8693 __m128d __B, const int __R)
8694{
8695 return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
8696 (__v2df) __B,
8697 (__v4sf) __W,
8698 __U,
8699 __R);
8700}
8701
8702extern __inline __m128
8703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8704_mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
8705 __m128d __B, const int __R)
8706{
8707 return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
8708 (__v2df) __B,
8709 _mm_setzero_ps (),
8710 __U,
8711 __R);
8712}
8713
075691af
AI
8714extern __inline __m128d
8715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8716_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8717{
8718 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8719 (__v4sf) __B,
8720 __R);
8721}
93103603
SP
8722
8723extern __inline __m128d
8724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8725_mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
8726 __m128 __B, const int __R)
8727{
8728 return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
8729 (__v4sf) __B,
8730 (__v2df) __W,
8731 __U,
8732 __R);
8733}
8734
8735extern __inline __m128d
8736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8737_mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
8738 __m128 __B, const int __R)
8739{
8740 return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
8741 (__v4sf) __B,
8742 _mm_setzero_pd (),
8743 __U,
8744 __R);
8745}
756c5857
AI
8746#else
8747#define _mm512_cvt_roundpd_ps(A, B) \
0b192937 8748 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
756c5857
AI
8749
8750#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8751 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8752
8753#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8754 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
075691af
AI
8755
8756#define _mm_cvt_roundsd_ss(A, B, C) \
8757 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8758
93103603
SP
8759#define _mm_mask_cvt_roundsd_ss(W, U, A, B, C) \
8760 (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
8761
8762#define _mm_maskz_cvt_roundsd_ss(U, A, B, C) \
8763 (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_setzero_ps (), \
8764 (U), (C))
8765
075691af
AI
8766#define _mm_cvt_roundss_sd(A, B, C) \
8767 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
93103603
SP
8768
8769#define _mm_mask_cvt_roundss_sd(W, U, A, B, C) \
8770 (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
8771
8772#define _mm_maskz_cvt_roundss_sd(U, A, B, C) \
8773 (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_setzero_pd (), \
8774 (U), (C))
8775
756c5857
AI
8776#endif
8777
93103603
SP
8778#define _mm_mask_cvtss_sd(W, U, A, B) \
8779 _mm_mask_cvt_roundss_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
8780
8781#define _mm_maskz_cvtss_sd(U, A, B) \
8782 _mm_maskz_cvt_roundss_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
8783
8784#define _mm_mask_cvtsd_ss(W, U, A, B) \
8785 _mm_mask_cvt_roundsd_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
8786
8787#define _mm_maskz_cvtsd_ss(U, A, B) \
8788 _mm_maskz_cvt_roundsd_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
8789
756c5857
AI
8790extern __inline void
8791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8792_mm512_stream_si512 (__m512i * __P, __m512i __A)
8793{
8794 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8795}
8796
8797extern __inline void
8798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8799_mm512_stream_ps (float *__P, __m512 __A)
8800{
8801 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8802}
8803
8804extern __inline void
8805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8806_mm512_stream_pd (double *__P, __m512d __A)
8807{
8808 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8809}
8810
c56a42b9
KY
8811extern __inline __m512i
8812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8813_mm512_stream_load_si512 (void *__P)
8814{
8815 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8816}
8817
9c3c2608
UB
8818/* Constants for mantissa extraction */
8819typedef enum
8820{
8821 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8822 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8823 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8824 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8825} _MM_MANTISSA_NORM_ENUM;
8826
8827typedef enum
8828{
8829 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8830 _MM_MANT_SIGN_zero, /* sign = 0 */
8831 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8832} _MM_MANTISSA_SIGN_ENUM;
8833
756c5857 8834#ifdef __OPTIMIZE__
075691af
AI
8835extern __inline __m128
8836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8837_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8838{
8839 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8840 (__v4sf) __B,
8841 __R);
8842}
8843
68d872d7
SP
8844extern __inline __m128
8845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846_mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8847 __m128 __B, const int __R)
8848{
8849 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8850 (__v4sf) __B,
8851 (__v4sf) __W,
8852 (__mmask8) __U, __R);
8853}
8854
8855extern __inline __m128
8856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8857_mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8858 const int __R)
8859{
8860 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8861 (__v4sf) __B,
8862 (__v4sf)
8863 _mm_setzero_ps (),
8864 (__mmask8) __U, __R);
8865}
8866
075691af
AI
8867extern __inline __m128d
8868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8869_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8870{
8871 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8872 (__v2df) __B,
8873 __R);
8874}
8875
68d872d7
SP
8876extern __inline __m128d
8877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8878_mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8879 __m128d __B, const int __R)
8880{
8881 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8882 (__v2df) __B,
8883 (__v2df) __W,
8884 (__mmask8) __U, __R);
8885}
8886
8887extern __inline __m128d
8888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8889_mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8890 const int __R)
8891{
8892 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8893 (__v2df) __B,
8894 (__v2df)
8895 _mm_setzero_pd (),
8896 (__mmask8) __U, __R);
8897}
8898
756c5857
AI
8899extern __inline __m512
8900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901_mm512_getexp_round_ps (__m512 __A, const int __R)
8902{
8903 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8904 (__v16sf)
0b192937 8905 _mm512_undefined_ps (),
756c5857
AI
8906 (__mmask16) -1, __R);
8907}
8908
8909extern __inline __m512
8910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8911_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8912 const int __R)
8913{
8914 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8915 (__v16sf) __W,
8916 (__mmask16) __U, __R);
8917}
8918
8919extern __inline __m512
8920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8921_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8922{
8923 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8924 (__v16sf)
8925 _mm512_setzero_ps (),
8926 (__mmask16) __U, __R);
8927}
8928
8929extern __inline __m512d
8930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8931_mm512_getexp_round_pd (__m512d __A, const int __R)
8932{
8933 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8934 (__v8df)
0b192937 8935 _mm512_undefined_pd (),
756c5857
AI
8936 (__mmask8) -1, __R);
8937}
8938
8939extern __inline __m512d
8940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8941_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8942 const int __R)
8943{
8944 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8945 (__v8df) __W,
8946 (__mmask8) __U, __R);
8947}
8948
8949extern __inline __m512d
8950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8951_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8952{
8953 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8954 (__v8df)
8955 _mm512_setzero_pd (),
8956 (__mmask8) __U, __R);
8957}
8958
756c5857
AI
8959extern __inline __m512d
8960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8961_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8962 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8963{
8964 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8965 (__C << 2) | __B,
0b192937 8966 _mm512_undefined_pd (),
756c5857
AI
8967 (__mmask8) -1, __R);
8968}
8969
8970extern __inline __m512d
8971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8972_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8973 _MM_MANTISSA_NORM_ENUM __B,
8974 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8975{
8976 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8977 (__C << 2) | __B,
8978 (__v8df) __W, __U,
8979 __R);
8980}
8981
8982extern __inline __m512d
8983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8985 _MM_MANTISSA_NORM_ENUM __B,
8986 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8987{
8988 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8989 (__C << 2) | __B,
8990 (__v8df)
8991 _mm512_setzero_pd (),
8992 __U, __R);
8993}
8994
8995extern __inline __m512
8996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8997_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8998 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8999{
9000 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
9001 (__C << 2) | __B,
0b192937 9002 _mm512_undefined_ps (),
756c5857
AI
9003 (__mmask16) -1, __R);
9004}
9005
9006extern __inline __m512
9007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9008_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
9009 _MM_MANTISSA_NORM_ENUM __B,
9010 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
9011{
9012 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
9013 (__C << 2) | __B,
9014 (__v16sf) __W, __U,
9015 __R);
9016}
9017
9018extern __inline __m512
9019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9020_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
9021 _MM_MANTISSA_NORM_ENUM __B,
9022 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
9023{
9024 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
9025 (__C << 2) | __B,
9026 (__v16sf)
9027 _mm512_setzero_ps (),
9028 __U, __R);
9029}
9030
075691af
AI
9031extern __inline __m128d
9032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9033_mm_getmant_round_sd (__m128d __A, __m128d __B,
9034 _MM_MANTISSA_NORM_ENUM __C,
9035 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9036{
9037 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
9038 (__v2df) __B,
9039 (__D << 2) | __C,
9040 __R);
9041}
9042
68d872d7
SP
9043extern __inline __m128d
9044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9045_mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
9046 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
9047 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9048{
9049 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
9050 (__v2df) __B,
9051 (__D << 2) | __C,
9052 (__v2df) __W,
9053 __U, __R);
9054}
9055
9056extern __inline __m128d
9057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9058_mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
9059 _MM_MANTISSA_NORM_ENUM __C,
9060 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9061{
9062 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
9063 (__v2df) __B,
9064 (__D << 2) | __C,
9065 (__v2df)
9066 _mm_setzero_pd(),
9067 __U, __R);
9068}
9069
075691af
AI
9070extern __inline __m128
9071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9072_mm_getmant_round_ss (__m128 __A, __m128 __B,
9073 _MM_MANTISSA_NORM_ENUM __C,
9074 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9075{
9076 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
9077 (__v4sf) __B,
9078 (__D << 2) | __C,
9079 __R);
9080}
9081
68d872d7
SP
9082extern __inline __m128
9083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9084_mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
9085 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
9086 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9087{
9088 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
9089 (__v4sf) __B,
9090 (__D << 2) | __C,
9091 (__v4sf) __W,
9092 __U, __R);
9093}
9094
9095extern __inline __m128
9096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9097_mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
9098 _MM_MANTISSA_NORM_ENUM __C,
9099 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9100{
9101 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
9102 (__v4sf) __B,
9103 (__D << 2) | __C,
9104 (__v4sf)
9105 _mm_setzero_ps(),
9106 __U, __R);
9107}
9108
756c5857
AI
9109#else
9110#define _mm512_getmant_round_pd(X, B, C, R) \
9111 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
9112 (int)(((C)<<2) | (B)), \
0b192937 9113 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
9114 (__mmask8)-1,\
9115 (R)))
9116
9117#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
9118 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
9119 (int)(((C)<<2) | (B)), \
9120 (__v8df)(__m512d)(W), \
9121 (__mmask8)(U),\
9122 (R)))
9123
9124#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
9125 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
9126 (int)(((C)<<2) | (B)), \
9127 (__v8df)(__m512d)_mm512_setzero_pd(), \
9128 (__mmask8)(U),\
9129 (R)))
9130#define _mm512_getmant_round_ps(X, B, C, R) \
9131 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
9132 (int)(((C)<<2) | (B)), \
0b192937 9133 (__v16sf)(__m512)_mm512_undefined_ps(), \
756c5857
AI
9134 (__mmask16)-1,\
9135 (R)))
9136
9137#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
9138 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
9139 (int)(((C)<<2) | (B)), \
9140 (__v16sf)(__m512)(W), \
9141 (__mmask16)(U),\
9142 (R)))
9143
9144#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
9145 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
9146 (int)(((C)<<2) | (B)), \
9147 (__v16sf)(__m512)_mm512_setzero_ps(), \
9148 (__mmask16)(U),\
9149 (R)))
075691af
AI
9150#define _mm_getmant_round_sd(X, Y, C, D, R) \
9151 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
9152 (__v2df)(__m128d)(Y), \
9153 (int)(((D)<<2) | (C)), \
9154 (R)))
9155
68d872d7
SP
9156#define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
9157 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
9158 (__v2df)(__m128d)(Y), \
9159 (int)(((D)<<2) | (C)), \
9160 (__v2df)(__m128d)(W), \
9161 (__mmask8)(U),\
9162 (R)))
9163
9164#define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
9165 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
9166 (__v2df)(__m128d)(Y), \
9167 (int)(((D)<<2) | (C)), \
9168 (__v2df)(__m128d)_mm_setzero_pd(), \
9169 (__mmask8)(U),\
9170 (R)))
9171
075691af
AI
9172#define _mm_getmant_round_ss(X, Y, C, D, R) \
9173 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
9174 (__v4sf)(__m128)(Y), \
9175 (int)(((D)<<2) | (C)), \
9176 (R)))
9177
68d872d7
SP
9178#define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
9179 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
9180 (__v4sf)(__m128)(Y), \
9181 (int)(((D)<<2) | (C)), \
9182 (__v4sf)(__m128)(W), \
9183 (__mmask8)(U),\
9184 (R)))
9185
9186#define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
9187 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
9188 (__v4sf)(__m128)(Y), \
9189 (int)(((D)<<2) | (C)), \
9190 (__v4sf)(__m128)_mm_setzero_ps(), \
9191 (__mmask8)(U),\
9192 (R)))
9193
075691af
AI
9194#define _mm_getexp_round_ss(A, B, R) \
9195 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
9196
68d872d7
SP
9197#define _mm_mask_getexp_round_ss(W, U, A, B, C) \
9198 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
9199
9200#define _mm_maskz_getexp_round_ss(U, A, B, C) \
9201 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
9202
075691af
AI
9203#define _mm_getexp_round_sd(A, B, R) \
9204 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
9205
68d872d7
SP
9206#define _mm_mask_getexp_round_sd(W, U, A, B, C) \
9207 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
9208
9209#define _mm_maskz_getexp_round_sd(U, A, B, C) \
9210 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
9211
9212
756c5857
AI
9213#define _mm512_getexp_round_ps(A, R) \
9214 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 9215 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
756c5857
AI
9216
9217#define _mm512_mask_getexp_round_ps(W, U, A, R) \
9218 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
9219 (__v16sf)(__m512)(W), (__mmask16)(U), R))
9220
9221#define _mm512_maskz_getexp_round_ps(U, A, R) \
9222 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
9223 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
9224
9225#define _mm512_getexp_round_pd(A, R) \
9226 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 9227 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
756c5857
AI
9228
9229#define _mm512_mask_getexp_round_pd(W, U, A, R) \
9230 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
9231 (__v8df)(__m512d)(W), (__mmask8)(U), R))
9232
9233#define _mm512_maskz_getexp_round_pd(U, A, R) \
9234 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
9235 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
9236#endif
9237
9238#ifdef __OPTIMIZE__
9239extern __inline __m512
9240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9241_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
9242{
9243 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
9244 (__v16sf)
9245 _mm512_undefined_ps (),
9246 -1, __R);
756c5857
AI
9247}
9248
9249extern __inline __m512
9250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9251_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
9252 const int __imm, const int __R)
9253{
9254 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
9255 (__v16sf) __A,
9256 (__mmask16) __B, __R);
9257}
9258
9259extern __inline __m512
9260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9261_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
9262 const int __imm, const int __R)
9263{
9264 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
9265 __imm,
9266 (__v16sf)
9267 _mm512_setzero_ps (),
9268 (__mmask16) __A, __R);
9269}
9270
9271extern __inline __m512d
9272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9273_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
9274{
9275 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
9276 (__v8df)
9277 _mm512_undefined_pd (),
9278 -1, __R);
756c5857
AI
9279}
9280
9281extern __inline __m512d
9282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
9284 __m512d __C, const int __imm, const int __R)
9285{
9286 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
9287 (__v8df) __A,
9288 (__mmask8) __B, __R);
9289}
9290
9291extern __inline __m512d
9292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9293_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
9294 const int __imm, const int __R)
9295{
9296 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
9297 __imm,
9298 (__v8df)
9299 _mm512_setzero_pd (),
9300 (__mmask8) __A, __R);
9301}
075691af
AI
9302
9303extern __inline __m128
9304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
a7c4d6d1
HL
9305_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
9306 const int __R)
9307{
9308 return (__m128)
9309 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
9310 (__v4sf) __B, __imm,
9311 (__v4sf)
9312 _mm_setzero_ps (),
9313 (__mmask8) -1,
9314 __R);
9315}
9316
9317extern __inline __m128
9318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9319_mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C,
9320 __m128 __D, const int __imm, const int __R)
075691af 9321{
a7c4d6d1
HL
9322 return (__m128)
9323 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
9324 (__v4sf) __D, __imm,
9325 (__v4sf) __A,
9326 (__mmask8) __B,
9327 __R);
9328}
9329
9330extern __inline __m128
9331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9332_mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
9333 const int __imm, const int __R)
9334{
9335 return (__m128)
9336 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
9337 (__v4sf) __C, __imm,
9338 (__v4sf)
9339 _mm_setzero_ps (),
9340 (__mmask8) __A,
9341 __R);
075691af
AI
9342}
9343
9344extern __inline __m128d
9345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9346_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
9347 const int __R)
9348{
a7c4d6d1
HL
9349 return (__m128d)
9350 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
9351 (__v2df) __B, __imm,
9352 (__v2df)
9353 _mm_setzero_pd (),
9354 (__mmask8) -1,
9355 __R);
9356}
9357
9358extern __inline __m128d
9359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9360_mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C,
9361 __m128d __D, const int __imm, const int __R)
9362{
9363 return (__m128d)
9364 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
9365 (__v2df) __D, __imm,
9366 (__v2df) __A,
9367 (__mmask8) __B,
9368 __R);
9369}
9370
9371extern __inline __m128d
9372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9373_mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
9374 const int __imm, const int __R)
9375{
9376 return (__m128d)
9377 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
9378 (__v2df) __C, __imm,
9379 (__v2df)
9380 _mm_setzero_pd (),
9381 (__mmask8) __A,
9382 __R);
075691af
AI
9383}
9384
756c5857
AI
9385#else
9386#define _mm512_roundscale_round_ps(A, B, R) \
9387 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 9388 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
756c5857
AI
9389#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
9390 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
9391 (int)(D), \
9392 (__v16sf)(__m512)(A), \
9393 (__mmask16)(B), R))
9394#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
9395 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
9396 (int)(C), \
9397 (__v16sf)_mm512_setzero_ps(),\
9398 (__mmask16)(A), R))
9399#define _mm512_roundscale_round_pd(A, B, R) \
9400 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 9401 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
756c5857
AI
9402#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
9403 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
9404 (int)(D), \
9405 (__v8df)(__m512d)(A), \
9406 (__mmask8)(B), R))
9407#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
9408 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
9409 (int)(C), \
9410 (__v8df)_mm512_setzero_pd(),\
9411 (__mmask8)(A), R))
a7c4d6d1
HL
9412#define _mm_roundscale_round_ss(A, B, I, R) \
9413 ((__m128) \
9414 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
9415 (__v4sf) (__m128) (B), \
9416 (int) (I), \
9417 (__v4sf) _mm_setzero_ps (), \
9418 (__mmask8) (-1), \
9419 (int) (R)))
9420#define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \
9421 ((__m128) \
9422 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
9423 (__v4sf) (__m128) (C), \
9424 (int) (I), \
9425 (__v4sf) (__m128) (A), \
9426 (__mmask8) (U), \
9427 (int) (R)))
9428#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
9429 ((__m128) \
9430 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
9431 (__v4sf) (__m128) (B), \
9432 (int) (I), \
9433 (__v4sf) _mm_setzero_ps (), \
9434 (__mmask8) (U), \
9435 (int) (R)))
9436#define _mm_roundscale_round_sd(A, B, I, R) \
9437 ((__m128d) \
9438 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
9439 (__v2df) (__m128d) (B), \
9440 (int) (I), \
9441 (__v2df) _mm_setzero_pd (), \
9442 (__mmask8) (-1), \
9443 (int) (R)))
9444#define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \
9445 ((__m128d) \
9446 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
9447 (__v2df) (__m128d) (C), \
9448 (int) (I), \
9449 (__v2df) (__m128d) (A), \
9450 (__mmask8) (U), \
9451 (int) (R)))
9452#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
9453 ((__m128d) \
9454 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
9455 (__v2df) (__m128d) (B), \
9456 (int) (I), \
9457 (__v2df) _mm_setzero_pd (), \
9458 (__mmask8) (U), \
9459 (int) (R)))
756c5857
AI
9460#endif
9461
9462extern __inline __m512
9463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9464_mm512_floor_ps (__m512 __A)
9465{
9466 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9467 _MM_FROUND_FLOOR,
9468 (__v16sf) __A, -1,
9469 _MM_FROUND_CUR_DIRECTION);
9470}
9471
9472extern __inline __m512d
9473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9474_mm512_floor_pd (__m512d __A)
9475{
9476 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9477 _MM_FROUND_FLOOR,
9478 (__v8df) __A, -1,
9479 _MM_FROUND_CUR_DIRECTION);
9480}
9481
9482extern __inline __m512
9483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484_mm512_ceil_ps (__m512 __A)
9485{
9486 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9487 _MM_FROUND_CEIL,
9488 (__v16sf) __A, -1,
9489 _MM_FROUND_CUR_DIRECTION);
9490}
9491
9492extern __inline __m512d
9493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9494_mm512_ceil_pd (__m512d __A)
9495{
9496 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9497 _MM_FROUND_CEIL,
9498 (__v8df) __A, -1,
9499 _MM_FROUND_CUR_DIRECTION);
9500}
9501
9502extern __inline __m512
9503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9504_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
9505{
9506 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9507 _MM_FROUND_FLOOR,
9508 (__v16sf) __W, __U,
9509 _MM_FROUND_CUR_DIRECTION);
9510}
9511
9512extern __inline __m512d
9513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9514_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
9515{
9516 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9517 _MM_FROUND_FLOOR,
9518 (__v8df) __W, __U,
9519 _MM_FROUND_CUR_DIRECTION);
9520}
9521
9522extern __inline __m512
9523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9524_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
9525{
9526 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9527 _MM_FROUND_CEIL,
9528 (__v16sf) __W, __U,
9529 _MM_FROUND_CUR_DIRECTION);
9530}
9531
9532extern __inline __m512d
9533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9534_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
9535{
9536 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9537 _MM_FROUND_CEIL,
9538 (__v8df) __W, __U,
9539 _MM_FROUND_CUR_DIRECTION);
9540}
9541
756c5857 9542#ifdef __OPTIMIZE__
756c5857
AI
9543extern __inline __m512i
9544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9545_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
9546{
9547 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9548 (__v16si) __B, __imm,
9549 (__v16si)
4271e5cb 9550 _mm512_undefined_epi32 (),
756c5857
AI
9551 (__mmask16) -1);
9552}
9553
9554extern __inline __m512i
9555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9556_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
9557 __m512i __B, const int __imm)
9558{
9559 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9560 (__v16si) __B, __imm,
9561 (__v16si) __W,
9562 (__mmask16) __U);
9563}
9564
9565extern __inline __m512i
9566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9567_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
9568 const int __imm)
9569{
9570 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9571 (__v16si) __B, __imm,
9572 (__v16si)
9573 _mm512_setzero_si512 (),
9574 (__mmask16) __U);
9575}
9576
9577extern __inline __m512i
9578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9579_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
9580{
9581 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9582 (__v8di) __B, __imm,
9583 (__v8di)
4271e5cb 9584 _mm512_undefined_epi32 (),
756c5857
AI
9585 (__mmask8) -1);
9586}
9587
9588extern __inline __m512i
9589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9590_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
9591 __m512i __B, const int __imm)
9592{
9593 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9594 (__v8di) __B, __imm,
9595 (__v8di) __W,
9596 (__mmask8) __U);
9597}
9598
9599extern __inline __m512i
9600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9601_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
9602 const int __imm)
9603{
9604 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9605 (__v8di) __B, __imm,
9606 (__v8di)
9607 _mm512_setzero_si512 (),
9608 (__mmask8) __U);
9609}
9610#else
756c5857
AI
9611#define _mm512_alignr_epi32(X, Y, C) \
9612 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
4271e5cb 9613 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
756c5857
AI
9614 (__mmask16)-1))
9615
9616#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
9617 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9618 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
9619 (__mmask16)(U)))
9620
9621#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
9622 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
0b192937 9623 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
756c5857
AI
9624 (__mmask16)(U)))
9625
9626#define _mm512_alignr_epi64(X, Y, C) \
9627 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
4271e5cb 9628 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
0b192937 9629 (__mmask8)-1))
756c5857
AI
9630
9631#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
9632 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9633 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
9634
9635#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
9636 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
0b192937 9637 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
756c5857
AI
9638 (__mmask8)(U)))
9639#endif
9640
9641extern __inline __mmask16
9642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9643_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
9644{
9645 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9646 (__v16si) __B,
9647 (__mmask16) -1);
9648}
9649
9650extern __inline __mmask16
9651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9652_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9653{
9654 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9655 (__v16si) __B, __U);
9656}
9657
9658extern __inline __mmask8
9659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9660_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9661{
9662 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9663 (__v8di) __B, __U);
9664}
9665
9666extern __inline __mmask8
9667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9668_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
9669{
9670 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9671 (__v8di) __B,
9672 (__mmask8) -1);
9673}
9674
9675extern __inline __mmask16
9676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9677_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9678{
9679 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9680 (__v16si) __B,
9681 (__mmask16) -1);
9682}
9683
9684extern __inline __mmask16
9685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9686_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9687{
9688 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9689 (__v16si) __B, __U);
9690}
9691
9692extern __inline __mmask8
9693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9694_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9695{
9696 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9697 (__v8di) __B, __U);
9698}
9699
9700extern __inline __mmask8
9701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9702_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9703{
9704 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9705 (__v8di) __B,
9706 (__mmask8) -1);
9707}
9708
d256b866
IT
9709extern __inline __mmask16
9710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9711_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9712{
9713 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9714 (__v16si) __Y, 5,
9715 (__mmask16) -1);
9716}
9717
275be1da
IT
9718extern __inline __mmask16
9719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9720_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9721{
9722 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9723 (__v16si) __Y, 5,
9724 (__mmask16) __M);
9725}
9726
9727extern __inline __mmask16
9728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9729_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9730{
9731 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9732 (__v16si) __Y, 5,
9733 (__mmask16) __M);
9734}
9735
d256b866
IT
9736extern __inline __mmask16
9737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9738_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9739{
9740 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9741 (__v16si) __Y, 5,
9742 (__mmask16) -1);
9743}
9744
275be1da
IT
9745extern __inline __mmask8
9746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9747_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9748{
9749 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9750 (__v8di) __Y, 5,
9751 (__mmask8) __M);
9752}
9753
d256b866
IT
9754extern __inline __mmask8
9755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9756_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9757{
9758 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9759 (__v8di) __Y, 5,
9760 (__mmask8) -1);
9761}
9762
275be1da
IT
9763extern __inline __mmask8
9764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9765_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9766{
9767 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9768 (__v8di) __Y, 5,
9769 (__mmask8) __M);
9770}
9771
d256b866
IT
9772extern __inline __mmask8
9773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9774_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9775{
9776 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9777 (__v8di) __Y, 5,
9778 (__mmask8) -1);
9779}
9780
275be1da
IT
9781extern __inline __mmask16
9782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9783_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9784{
9785 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9786 (__v16si) __Y, 2,
9787 (__mmask16) __M);
9788}
9789
d256b866
IT
9790extern __inline __mmask16
9791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9792_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9793{
9794 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9795 (__v16si) __Y, 2,
9796 (__mmask16) -1);
9797}
9798
275be1da
IT
9799extern __inline __mmask16
9800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9801_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9802{
9803 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9804 (__v16si) __Y, 2,
9805 (__mmask16) __M);
9806}
9807
d256b866
IT
9808extern __inline __mmask16
9809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9810_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9811{
9812 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9813 (__v16si) __Y, 2,
9814 (__mmask16) -1);
9815}
9816
275be1da
IT
9817extern __inline __mmask8
9818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9819_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9820{
9821 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9822 (__v8di) __Y, 2,
9823 (__mmask8) __M);
9824}
9825
d256b866
IT
9826extern __inline __mmask8
9827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9828_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9829{
9830 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9831 (__v8di) __Y, 2,
9832 (__mmask8) -1);
9833}
9834
275be1da
IT
9835extern __inline __mmask8
9836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9837_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9838{
9839 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9840 (__v8di) __Y, 2,
9841 (__mmask8) __M);
9842}
9843
d256b866
IT
9844extern __inline __mmask8
9845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9846_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9847{
9848 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9849 (__v8di) __Y, 2,
9850 (__mmask8) -1);
9851}
9852
275be1da
IT
9853extern __inline __mmask16
9854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9855_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9856{
9857 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9858 (__v16si) __Y, 1,
9859 (__mmask16) __M);
9860}
9861
d256b866
IT
9862extern __inline __mmask16
9863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9864_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9865{
9866 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9867 (__v16si) __Y, 1,
9868 (__mmask16) -1);
9869}
9870
275be1da
IT
9871extern __inline __mmask16
9872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9873_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9874{
9875 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9876 (__v16si) __Y, 1,
9877 (__mmask16) __M);
9878}
9879
d256b866
IT
9880extern __inline __mmask16
9881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9882_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9883{
9884 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9885 (__v16si) __Y, 1,
9886 (__mmask16) -1);
9887}
9888
275be1da
IT
9889extern __inline __mmask8
9890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9891_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9892{
9893 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9894 (__v8di) __Y, 1,
9895 (__mmask8) __M);
9896}
9897
d256b866
IT
9898extern __inline __mmask8
9899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9900_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9901{
9902 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9903 (__v8di) __Y, 1,
9904 (__mmask8) -1);
9905}
9906
275be1da
IT
9907extern __inline __mmask8
9908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9909_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9910{
9911 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9912 (__v8di) __Y, 1,
9913 (__mmask8) __M);
9914}
9915
d256b866
IT
9916extern __inline __mmask8
9917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9918_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9919{
9920 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9921 (__v8di) __Y, 1,
9922 (__mmask8) -1);
9923}
9924
9925extern __inline __mmask16
9926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9927_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9928{
9929 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9930 (__v16si) __Y, 4,
9931 (__mmask16) -1);
9932}
9933
275be1da
IT
9934extern __inline __mmask16
9935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9936_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9937{
9938 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9939 (__v16si) __Y, 4,
9940 (__mmask16) __M);
9941}
9942
9943extern __inline __mmask16
9944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9945_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9946{
9947 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9948 (__v16si) __Y, 4,
9949 (__mmask16) __M);
9950}
9951
d256b866
IT
9952extern __inline __mmask16
9953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9954_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9955{
9956 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9957 (__v16si) __Y, 4,
9958 (__mmask16) -1);
9959}
9960
275be1da
IT
9961extern __inline __mmask8
9962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 9963_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
275be1da
IT
9964{
9965 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9966 (__v8di) __Y, 4,
9967 (__mmask8) __M);
9968}
9969
d256b866
IT
9970extern __inline __mmask8
9971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9972_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9973{
9974 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9975 (__v8di) __Y, 4,
9976 (__mmask8) -1);
9977}
9978
275be1da
IT
9979extern __inline __mmask8
9980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9981_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9982{
9983 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9984 (__v8di) __Y, 4,
9985 (__mmask8) __M);
9986}
9987
d256b866
IT
9988extern __inline __mmask8
9989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9990_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9991{
9992 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9993 (__v8di) __Y, 4,
9994 (__mmask8) -1);
9995}
9996
756c5857
AI
9997#define _MM_CMPINT_EQ 0x0
9998#define _MM_CMPINT_LT 0x1
9999#define _MM_CMPINT_LE 0x2
10000#define _MM_CMPINT_UNUSED 0x3
10001#define _MM_CMPINT_NE 0x4
10002#define _MM_CMPINT_NLT 0x5
10003#define _MM_CMPINT_GE 0x5
10004#define _MM_CMPINT_NLE 0x6
10005#define _MM_CMPINT_GT 0x6
10006
10007#ifdef __OPTIMIZE__
d8ea3e7c
AS
10008extern __inline __mmask16
10009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10010_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
10011{
10012 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
10013 (__mmask8) __B);
10014}
10015
10016extern __inline __mmask16
10017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10018_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
10019{
10020 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
10021 (__mmask8) __B);
10022}
10023
756c5857
AI
10024extern __inline __mmask8
10025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10026_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
10027{
10028 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
10029 (__v8di) __Y, __P,
10030 (__mmask8) -1);
10031}
10032
10033extern __inline __mmask16
10034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10035_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
10036{
10037 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
10038 (__v16si) __Y, __P,
10039 (__mmask16) -1);
10040}
10041
10042extern __inline __mmask8
10043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10044_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
10045{
10046 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
10047 (__v8di) __Y, __P,
10048 (__mmask8) -1);
10049}
10050
10051extern __inline __mmask16
10052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10053_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
10054{
10055 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
10056 (__v16si) __Y, __P,
10057 (__mmask16) -1);
10058}
10059
10060extern __inline __mmask8
10061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10062_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
10063 const int __R)
10064{
10065 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
10066 (__v8df) __Y, __P,
10067 (__mmask8) -1, __R);
10068}
10069
10070extern __inline __mmask16
10071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10072_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
10073{
10074 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
10075 (__v16sf) __Y, __P,
10076 (__mmask16) -1, __R);
10077}
10078
10079extern __inline __mmask8
10080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
10082 const int __P)
10083{
10084 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
10085 (__v8di) __Y, __P,
10086 (__mmask8) __U);
10087}
10088
10089extern __inline __mmask16
10090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10091_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
10092 const int __P)
10093{
10094 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
10095 (__v16si) __Y, __P,
10096 (__mmask16) __U);
10097}
10098
10099extern __inline __mmask8
10100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10101_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
10102 const int __P)
10103{
10104 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
10105 (__v8di) __Y, __P,
10106 (__mmask8) __U);
10107}
10108
10109extern __inline __mmask16
10110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10111_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
10112 const int __P)
10113{
10114 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
10115 (__v16si) __Y, __P,
10116 (__mmask16) __U);
10117}
10118
10119extern __inline __mmask8
10120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10121_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
10122 const int __P, const int __R)
10123{
10124 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
10125 (__v8df) __Y, __P,
10126 (__mmask8) __U, __R);
10127}
10128
10129extern __inline __mmask16
10130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10131_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
10132 const int __P, const int __R)
10133{
10134 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
10135 (__v16sf) __Y, __P,
10136 (__mmask16) __U, __R);
10137}
10138
10139extern __inline __mmask8
10140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10141_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
10142{
10143 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
10144 (__v2df) __Y, __P,
10145 (__mmask8) -1, __R);
10146}
10147
10148extern __inline __mmask8
10149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10150_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
10151 const int __P, const int __R)
10152{
10153 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
10154 (__v2df) __Y, __P,
10155 (__mmask8) __M, __R);
10156}
10157
10158extern __inline __mmask8
10159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10160_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
10161{
10162 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
10163 (__v4sf) __Y, __P,
10164 (__mmask8) -1, __R);
10165}
10166
10167extern __inline __mmask8
10168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10169_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
10170 const int __P, const int __R)
10171{
10172 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
10173 (__v4sf) __Y, __P,
10174 (__mmask8) __M, __R);
10175}
10176
10177#else
d8ea3e7c
AS
10178#define _kshiftli_mask16(X, Y) \
10179 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
10180
10181#define _kshiftri_mask16(X, Y) \
10182 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
10183
756c5857
AI
10184#define _mm512_cmp_epi64_mask(X, Y, P) \
10185 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
10186 (__v8di)(__m512i)(Y), (int)(P),\
10187 (__mmask8)-1))
10188
10189#define _mm512_cmp_epi32_mask(X, Y, P) \
383321ec
UB
10190 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
10191 (__v16si)(__m512i)(Y), (int)(P), \
10192 (__mmask16)-1))
756c5857
AI
10193
10194#define _mm512_cmp_epu64_mask(X, Y, P) \
10195 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
10196 (__v8di)(__m512i)(Y), (int)(P),\
10197 (__mmask8)-1))
10198
10199#define _mm512_cmp_epu32_mask(X, Y, P) \
383321ec
UB
10200 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
10201 (__v16si)(__m512i)(Y), (int)(P), \
10202 (__mmask16)-1))
756c5857 10203
383321ec 10204#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
756c5857
AI
10205 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
10206 (__v8df)(__m512d)(Y), (int)(P),\
10207 (__mmask8)-1, R))
10208
383321ec 10209#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
756c5857
AI
10210 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
10211 (__v16sf)(__m512)(Y), (int)(P),\
10212 (__mmask16)-1, R))
10213
383321ec 10214#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
756c5857
AI
10215 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
10216 (__v8di)(__m512i)(Y), (int)(P),\
0c8217b1 10217 (__mmask8)(M)))
756c5857 10218
383321ec
UB
10219#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
10220 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
10221 (__v16si)(__m512i)(Y), (int)(P), \
0c8217b1 10222 (__mmask16)(M)))
756c5857 10223
383321ec 10224#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
756c5857
AI
10225 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
10226 (__v8di)(__m512i)(Y), (int)(P),\
0c8217b1 10227 (__mmask8)(M)))
756c5857 10228
383321ec
UB
10229#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
10230 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
10231 (__v16si)(__m512i)(Y), (int)(P), \
0c8217b1 10232 (__mmask16)(M)))
756c5857 10233
383321ec 10234#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
756c5857
AI
10235 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
10236 (__v8df)(__m512d)(Y), (int)(P),\
0c8217b1 10237 (__mmask8)(M), R))
756c5857 10238
383321ec 10239#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
756c5857
AI
10240 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
10241 (__v16sf)(__m512)(Y), (int)(P),\
0c8217b1 10242 (__mmask16)(M), R))
756c5857 10243
383321ec 10244#define _mm_cmp_round_sd_mask(X, Y, P, R) \
756c5857
AI
10245 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
10246 (__v2df)(__m128d)(Y), (int)(P),\
10247 (__mmask8)-1, R))
10248
383321ec 10249#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
756c5857
AI
10250 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
10251 (__v2df)(__m128d)(Y), (int)(P),\
10252 (M), R))
10253
383321ec 10254#define _mm_cmp_round_ss_mask(X, Y, P, R) \
756c5857
AI
10255 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
10256 (__v4sf)(__m128)(Y), (int)(P), \
10257 (__mmask8)-1, R))
10258
383321ec 10259#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
756c5857
AI
10260 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
10261 (__v4sf)(__m128)(Y), (int)(P), \
10262 (M), R))
10263#endif
10264
10265#ifdef __OPTIMIZE__
10266extern __inline __m512
10267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10268_mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 10269{
b5fd0b71
JJ
10270 __m512 __v1_old = _mm512_undefined_ps ();
10271 __mmask16 __mask = 0xFFFF;
756c5857 10272
b5fd0b71 10273 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
10274 __addr,
10275 (__v16si) __index,
b5fd0b71 10276 __mask, __scale);
756c5857
AI
10277}
10278
10279extern __inline __m512
10280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 10281_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
0e171d54 10282 __m512i __index, void const *__addr, int __scale)
756c5857 10283{
b5fd0b71 10284 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
10285 __addr,
10286 (__v16si) __index,
10287 __mask, __scale);
10288}
10289
10290extern __inline __m512d
10291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10292_mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
756c5857 10293{
b5fd0b71
JJ
10294 __m512d __v1_old = _mm512_undefined_pd ();
10295 __mmask8 __mask = 0xFF;
756c5857 10296
b5fd0b71 10297 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
756c5857 10298 __addr,
b5fd0b71 10299 (__v8si) __index, __mask,
756c5857
AI
10300 __scale);
10301}
10302
10303extern __inline __m512d
10304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10305_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 10306 __m256i __index, void const *__addr, int __scale)
756c5857
AI
10307{
10308 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
10309 __addr,
10310 (__v8si) __index,
10311 __mask, __scale);
10312}
10313
10314extern __inline __m256
10315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10316_mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 10317{
b5fd0b71
JJ
10318 __m256 __v1_old = _mm256_undefined_ps ();
10319 __mmask8 __mask = 0xFF;
756c5857 10320
b5fd0b71 10321 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
756c5857 10322 __addr,
b5fd0b71 10323 (__v8di) __index, __mask,
756c5857
AI
10324 __scale);
10325}
10326
10327extern __inline __m256
10328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10329_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
0e171d54 10330 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10331{
10332 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
10333 __addr,
10334 (__v8di) __index,
10335 __mask, __scale);
10336}
10337
10338extern __inline __m512d
10339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10340_mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
756c5857 10341{
b5fd0b71
JJ
10342 __m512d __v1_old = _mm512_undefined_pd ();
10343 __mmask8 __mask = 0xFF;
756c5857 10344
b5fd0b71 10345 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
756c5857 10346 __addr,
b5fd0b71 10347 (__v8di) __index, __mask,
756c5857
AI
10348 __scale);
10349}
10350
10351extern __inline __m512d
10352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10353_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 10354 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10355{
10356 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
10357 __addr,
10358 (__v8di) __index,
10359 __mask, __scale);
10360}
10361
10362extern __inline __m512i
10363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10364_mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 10365{
b5fd0b71
JJ
10366 __m512i __v1_old = _mm512_undefined_epi32 ();
10367 __mmask16 __mask = 0xFFFF;
756c5857 10368
b5fd0b71 10369 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
756c5857
AI
10370 __addr,
10371 (__v16si) __index,
b5fd0b71 10372 __mask, __scale);
756c5857
AI
10373}
10374
10375extern __inline __m512i
10376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10377_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
0e171d54 10378 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10379{
10380 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
10381 __addr,
10382 (__v16si) __index,
10383 __mask, __scale);
10384}
10385
10386extern __inline __m512i
10387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10388_mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
756c5857 10389{
b5fd0b71
JJ
10390 __m512i __v1_old = _mm512_undefined_epi32 ();
10391 __mmask8 __mask = 0xFF;
756c5857 10392
b5fd0b71 10393 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
756c5857 10394 __addr,
b5fd0b71 10395 (__v8si) __index, __mask,
756c5857
AI
10396 __scale);
10397}
10398
10399extern __inline __m512i
10400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10401_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 10402 __m256i __index, void const *__addr,
756c5857
AI
10403 int __scale)
10404{
10405 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
10406 __addr,
10407 (__v8si) __index,
10408 __mask, __scale);
10409}
10410
10411extern __inline __m256i
10412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10413_mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 10414{
b5fd0b71
JJ
10415 __m256i __v1_old = _mm256_undefined_si256 ();
10416 __mmask8 __mask = 0xFF;
756c5857 10417
b5fd0b71 10418 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
756c5857
AI
10419 __addr,
10420 (__v8di) __index,
b5fd0b71 10421 __mask, __scale);
756c5857
AI
10422}
10423
10424extern __inline __m256i
10425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10426_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
0e171d54 10427 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10428{
10429 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10430 __addr,
10431 (__v8di) __index,
10432 __mask, __scale);
10433}
10434
10435extern __inline __m512i
10436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10437_mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
756c5857 10438{
b5fd0b71
JJ
10439 __m512i __v1_old = _mm512_undefined_epi32 ();
10440 __mmask8 __mask = 0xFF;
756c5857 10441
b5fd0b71 10442 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
756c5857 10443 __addr,
b5fd0b71 10444 (__v8di) __index, __mask,
756c5857
AI
10445 __scale);
10446}
10447
10448extern __inline __m512i
10449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10450_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 10451 __m512i __index, void const *__addr,
756c5857
AI
10452 int __scale)
10453{
10454 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10455 __addr,
10456 (__v8di) __index,
10457 __mask, __scale);
10458}
10459
10460extern __inline void
10461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10462_mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
756c5857
AI
10463{
10464 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
10465 (__v16si) __index, (__v16sf) __v1, __scale);
10466}
10467
10468extern __inline void
10469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10470_mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
756c5857
AI
10471 __m512i __index, __m512 __v1, int __scale)
10472{
10473 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
10474 (__v16sf) __v1, __scale);
10475}
10476
10477extern __inline void
10478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10479_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
756c5857
AI
10480 int __scale)
10481{
10482 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
10483 (__v8si) __index, (__v8df) __v1, __scale);
10484}
10485
10486extern __inline void
10487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10488_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
10489 __m256i __index, __m512d __v1, int __scale)
10490{
10491 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
10492 (__v8df) __v1, __scale);
10493}
10494
10495extern __inline void
10496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10497_mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
756c5857
AI
10498{
10499 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
10500 (__v8di) __index, (__v8sf) __v1, __scale);
10501}
10502
10503extern __inline void
10504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10505_mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
756c5857
AI
10506 __m512i __index, __m256 __v1, int __scale)
10507{
10508 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
10509 (__v8sf) __v1, __scale);
10510}
10511
10512extern __inline void
10513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10514_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
756c5857
AI
10515 int __scale)
10516{
10517 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
10518 (__v8di) __index, (__v8df) __v1, __scale);
10519}
10520
10521extern __inline void
10522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10523_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
10524 __m512i __index, __m512d __v1, int __scale)
10525{
10526 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
10527 (__v8df) __v1, __scale);
10528}
10529
10530extern __inline void
10531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10532_mm512_i32scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
10533 __m512i __v1, int __scale)
10534{
10535 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
10536 (__v16si) __index, (__v16si) __v1, __scale);
10537}
10538
10539extern __inline void
10540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10541_mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
756c5857
AI
10542 __m512i __index, __m512i __v1, int __scale)
10543{
10544 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
10545 (__v16si) __v1, __scale);
10546}
10547
10548extern __inline void
10549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10550_mm512_i32scatter_epi64 (void *__addr, __m256i __index,
756c5857
AI
10551 __m512i __v1, int __scale)
10552{
10553 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
10554 (__v8si) __index, (__v8di) __v1, __scale);
10555}
10556
10557extern __inline void
10558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10559_mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
10560 __m256i __index, __m512i __v1, int __scale)
10561{
10562 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
10563 (__v8di) __v1, __scale);
10564}
10565
10566extern __inline void
10567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10568_mm512_i64scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
10569 __m256i __v1, int __scale)
10570{
10571 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
10572 (__v8di) __index, (__v8si) __v1, __scale);
10573}
10574
10575extern __inline void
10576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10577_mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
756c5857
AI
10578 __m512i __index, __m256i __v1, int __scale)
10579{
10580 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
10581 (__v8si) __v1, __scale);
10582}
10583
10584extern __inline void
10585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10586_mm512_i64scatter_epi64 (void *__addr, __m512i __index,
756c5857
AI
10587 __m512i __v1, int __scale)
10588{
10589 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
10590 (__v8di) __index, (__v8di) __v1, __scale);
10591}
10592
10593extern __inline void
10594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10595_mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
10596 __m512i __index, __m512i __v1, int __scale)
10597{
10598 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
10599 (__v8di) __v1, __scale);
10600}
10601#else
10602#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
0b192937 10603 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
78cef090
JJ
10604 (void const *) (ADDR), \
10605 (__v16si)(__m512i) (INDEX), \
10606 (__mmask16)0xFFFF, \
10607 (int) (SCALE))
756c5857
AI
10608
10609#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10610 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD), \
10611 (void const *) (ADDR), \
10612 (__v16si)(__m512i) (INDEX), \
10613 (__mmask16) (MASK), \
10614 (int) (SCALE))
756c5857
AI
10615
10616#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
0b192937 10617 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
78cef090
JJ
10618 (void const *) (ADDR), \
10619 (__v8si)(__m256i) (INDEX), \
10620 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10621
10622#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10623 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD), \
10624 (void const *) (ADDR), \
10625 (__v8si)(__m256i) (INDEX), \
10626 (__mmask8) (MASK), \
10627 (int) (SCALE))
756c5857
AI
10628
10629#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
0b192937 10630 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
78cef090
JJ
10631 (void const *) (ADDR), \
10632 (__v8di)(__m512i) (INDEX), \
10633 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10634
10635#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10636 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD), \
10637 (void const *) (ADDR), \
10638 (__v8di)(__m512i) (INDEX), \
10639 (__mmask8) (MASK), \
10640 (int) (SCALE))
756c5857
AI
10641
10642#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
0b192937 10643 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
78cef090
JJ
10644 (void const *) (ADDR), \
10645 (__v8di)(__m512i) (INDEX), \
10646 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10647
10648#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10649 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD), \
10650 (void const *) (ADDR), \
10651 (__v8di)(__m512i) (INDEX), \
10652 (__mmask8) (MASK), \
10653 (int) (SCALE))
756c5857
AI
10654
10655#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
78cef090
JJ
10656 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\
10657 (void const *) (ADDR), \
10658 (__v16si)(__m512i) (INDEX), \
10659 (__mmask16)0xFFFF, \
10660 (int) (SCALE))
756c5857
AI
10661
10662#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10663 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD), \
10664 (void const *) (ADDR), \
10665 (__v16si)(__m512i) (INDEX), \
10666 (__mmask16) (MASK), \
10667 (int) (SCALE))
756c5857
AI
10668
10669#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
78cef090
JJ
10670 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\
10671 (void const *) (ADDR), \
10672 (__v8si)(__m256i) (INDEX), \
10673 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10674
10675#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10676 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD), \
10677 (void const *) (ADDR), \
10678 (__v8si)(__m256i) (INDEX), \
10679 (__mmask8) (MASK), \
10680 (int) (SCALE))
10681
10682#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
10683 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\
10684 (void const *) (ADDR), \
10685 (__v8di)(__m512i) (INDEX), \
10686 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10687
10688#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10689 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD), \
10690 (void const *) (ADDR), \
10691 (__v8di)(__m512i) (INDEX), \
10692 (__mmask8) (MASK), \
10693 (int) (SCALE))
756c5857
AI
10694
10695#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
78cef090
JJ
10696 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\
10697 (void const *) (ADDR), \
10698 (__v8di)(__m512i) (INDEX), \
10699 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10700
10701#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10702 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD), \
10703 (void const *) (ADDR), \
10704 (__v8di)(__m512i) (INDEX), \
10705 (__mmask8) (MASK), \
10706 (int) (SCALE))
756c5857
AI
10707
10708#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10709 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF, \
10710 (__v16si)(__m512i) (INDEX), \
10711 (__v16sf)(__m512) (V1), (int) (SCALE))
756c5857
AI
10712
10713#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10714 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
10715 (__v16si)(__m512i) (INDEX), \
10716 (__v16sf)(__m512) (V1), (int) (SCALE))
756c5857
AI
10717
10718#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10719 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF, \
10720 (__v8si)(__m256i) (INDEX), \
10721 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10722
10723#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10724 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK), \
10725 (__v8si)(__m256i) (INDEX), \
10726 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10727
10728#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10729 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF, \
10730 (__v8di)(__m512i) (INDEX), \
10731 (__v8sf)(__m256) (V1), (int) (SCALE))
756c5857
AI
10732
10733#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10734 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
10735 (__v8di)(__m512i) (INDEX), \
10736 (__v8sf)(__m256) (V1), (int) (SCALE))
756c5857
AI
10737
10738#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10739 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF, \
10740 (__v8di)(__m512i) (INDEX), \
10741 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10742
10743#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10744 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK), \
10745 (__v8di)(__m512i) (INDEX), \
10746 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10747
10748#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10749 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF, \
10750 (__v16si)(__m512i) (INDEX), \
10751 (__v16si)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10752
10753#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10754 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK), \
10755 (__v16si)(__m512i) (INDEX), \
10756 (__v16si)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10757
10758#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10759 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF, \
10760 (__v8si)(__m256i) (INDEX), \
10761 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10762
10763#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10764 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK), \
10765 (__v8si)(__m256i) (INDEX), \
10766 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10767
10768#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10769 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF, \
10770 (__v8di)(__m512i) (INDEX), \
10771 (__v8si)(__m256i) (V1), (int) (SCALE))
756c5857
AI
10772
10773#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10774 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK), \
10775 (__v8di)(__m512i) (INDEX), \
10776 (__v8si)(__m256i) (V1), (int) (SCALE))
756c5857
AI
10777
10778#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10779 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF, \
10780 (__v8di)(__m512i) (INDEX), \
10781 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10782
10783#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10784 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK), \
10785 (__v8di)(__m512i) (INDEX), \
10786 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10787#endif
10788
10789extern __inline __m512d
10790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10791_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10792{
10793 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10794 (__v8df) __W,
10795 (__mmask8) __U);
10796}
10797
10798extern __inline __m512d
10799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10800_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10801{
10802 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10803 (__v8df)
10804 _mm512_setzero_pd (),
10805 (__mmask8) __U);
10806}
10807
10808extern __inline void
10809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10810_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10811{
10812 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10813 (__mmask8) __U);
10814}
10815
10816extern __inline __m512
10817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10818_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10819{
10820 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10821 (__v16sf) __W,
10822 (__mmask16) __U);
10823}
10824
10825extern __inline __m512
10826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10827_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10828{
10829 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10830 (__v16sf)
10831 _mm512_setzero_ps (),
10832 (__mmask16) __U);
10833}
10834
10835extern __inline void
10836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10837_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10838{
10839 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10840 (__mmask16) __U);
10841}
10842
10843extern __inline __m512i
10844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10845_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10846{
10847 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10848 (__v8di) __W,
10849 (__mmask8) __U);
10850}
10851
10852extern __inline __m512i
10853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10854_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10855{
10856 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10857 (__v8di)
10858 _mm512_setzero_si512 (),
10859 (__mmask8) __U);
10860}
10861
10862extern __inline void
10863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10864_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10865{
10866 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10867 (__mmask8) __U);
10868}
10869
10870extern __inline __m512i
10871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10872_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10873{
10874 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10875 (__v16si) __W,
10876 (__mmask16) __U);
10877}
10878
10879extern __inline __m512i
10880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10881_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10882{
10883 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10884 (__v16si)
10885 _mm512_setzero_si512 (),
10886 (__mmask16) __U);
10887}
10888
10889extern __inline void
10890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10892{
10893 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10894 (__mmask16) __U);
10895}
10896
10897extern __inline __m512d
10898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10899_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10900{
10901 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10902 (__v8df) __W,
10903 (__mmask8) __U);
10904}
10905
10906extern __inline __m512d
10907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10908_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10909{
10910 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10911 (__v8df)
10912 _mm512_setzero_pd (),
10913 (__mmask8) __U);
10914}
10915
10916extern __inline __m512d
10917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10918_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10919{
10920 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10921 (__v8df) __W,
10922 (__mmask8) __U);
10923}
10924
10925extern __inline __m512d
10926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10927_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10928{
10929 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10930 (__v8df)
10931 _mm512_setzero_pd (),
10932 (__mmask8) __U);
10933}
10934
10935extern __inline __m512
10936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10937_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10938{
10939 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10940 (__v16sf) __W,
10941 (__mmask16) __U);
10942}
10943
10944extern __inline __m512
10945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10946_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10947{
10948 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10949 (__v16sf)
10950 _mm512_setzero_ps (),
10951 (__mmask16) __U);
10952}
10953
10954extern __inline __m512
10955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10956_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10957{
10958 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10959 (__v16sf) __W,
10960 (__mmask16) __U);
10961}
10962
10963extern __inline __m512
10964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10965_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10966{
10967 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10968 (__v16sf)
10969 _mm512_setzero_ps (),
10970 (__mmask16) __U);
10971}
10972
10973extern __inline __m512i
10974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10975_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10976{
10977 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10978 (__v8di) __W,
10979 (__mmask8) __U);
10980}
10981
10982extern __inline __m512i
10983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10984_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10985{
10986 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10987 (__v8di)
10988 _mm512_setzero_si512 (),
10989 (__mmask8) __U);
10990}
10991
10992extern __inline __m512i
10993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10994_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10995{
10996 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10997 (__v8di) __W,
10998 (__mmask8) __U);
10999}
11000
11001extern __inline __m512i
11002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11003_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
11004{
11005 return (__m512i)
11006 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
11007 (__v8di)
11008 _mm512_setzero_si512 (),
11009 (__mmask8) __U);
11010}
11011
11012extern __inline __m512i
11013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11014_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
11015{
11016 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
11017 (__v16si) __W,
11018 (__mmask16) __U);
11019}
11020
11021extern __inline __m512i
11022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
11024{
11025 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
11026 (__v16si)
11027 _mm512_setzero_si512 (),
11028 (__mmask16) __U);
11029}
11030
11031extern __inline __m512i
11032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11033_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
11034{
11035 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
11036 (__v16si) __W,
11037 (__mmask16) __U);
11038}
11039
11040extern __inline __m512i
11041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11042_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
11043{
11044 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
11045 (__v16si)
11046 _mm512_setzero_si512
11047 (), (__mmask16) __U);
11048}
11049
11050/* Mask arithmetic operations */
6901ea62
AS
11051#define _kand_mask16 _mm512_kand
11052#define _kandn_mask16 _mm512_kandn
11053#define _knot_mask16 _mm512_knot
11054#define _kor_mask16 _mm512_kor
11055#define _kxnor_mask16 _mm512_kxnor
11056#define _kxor_mask16 _mm512_kxor
11057
dea06111
AS
11058extern __inline unsigned char
11059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11060_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
11061{
11062 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
11063 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
11064}
11065
11066extern __inline unsigned char
11067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
11069{
11070 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
11071 (__mmask16) __B);
11072}
11073
11074extern __inline unsigned char
11075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11076_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
11077{
11078 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
11079 (__mmask16) __B);
11080}
11081
7cdb6e4c
AS
11082extern __inline unsigned int
11083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11084_cvtmask16_u32 (__mmask16 __A)
11085{
11086 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
11087}
11088
11089extern __inline __mmask16
11090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11091_cvtu32_mask16 (unsigned int __A)
11092{
11093 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
11094}
11095
11096extern __inline __mmask16
11097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11098_load_mask16 (__mmask16 *__A)
11099{
11100 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
11101}
11102
11103extern __inline void
11104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11105_store_mask16 (__mmask16 *__A, __mmask16 __B)
11106{
11107 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
11108}
11109
756c5857
AI
11110extern __inline __mmask16
11111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11112_mm512_kand (__mmask16 __A, __mmask16 __B)
11113{
11114 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
11115}
11116
11117extern __inline __mmask16
11118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11119_mm512_kandn (__mmask16 __A, __mmask16 __B)
11120{
6901ea62
AS
11121 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
11122 (__mmask16) __B);
756c5857
AI
11123}
11124
11125extern __inline __mmask16
11126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11127_mm512_kor (__mmask16 __A, __mmask16 __B)
11128{
11129 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
11130}
11131
11132extern __inline int
11133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11134_mm512_kortestz (__mmask16 __A, __mmask16 __B)
11135{
11136 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
11137 (__mmask16) __B);
11138}
11139
11140extern __inline int
11141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11142_mm512_kortestc (__mmask16 __A, __mmask16 __B)
11143{
11144 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
11145 (__mmask16) __B);
11146}
11147
11148extern __inline __mmask16
11149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11150_mm512_kxnor (__mmask16 __A, __mmask16 __B)
11151{
11152 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
11153}
11154
11155extern __inline __mmask16
11156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11157_mm512_kxor (__mmask16 __A, __mmask16 __B)
11158{
11159 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
11160}
11161
11162extern __inline __mmask16
11163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11164_mm512_knot (__mmask16 __A)
11165{
11166 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
11167}
11168
11169extern __inline __mmask16
11170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11171_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
11172{
11173 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
11174}
11175
6901ea62
AS
11176extern __inline __mmask16
11177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11178_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
11179{
11180 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
11181}
11182
756c5857
AI
11183#ifdef __OPTIMIZE__
11184extern __inline __m512i
11185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11186_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
11187 const int __imm)
11188{
11189 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
11190 (__v4si) __D,
11191 __imm,
11192 (__v16si)
11193 _mm512_setzero_si512 (),
11194 __B);
11195}
11196
11197extern __inline __m512
11198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11199_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
11200 const int __imm)
11201{
11202 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
11203 (__v4sf) __D,
11204 __imm,
11205 (__v16sf)
11206 _mm512_setzero_ps (), __B);
11207}
11208
11209extern __inline __m512i
11210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11211_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
11212 __m128i __D, const int __imm)
11213{
11214 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
11215 (__v4si) __D,
11216 __imm,
11217 (__v16si) __A,
11218 __B);
11219}
11220
11221extern __inline __m512
11222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11223_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
11224 __m128 __D, const int __imm)
11225{
11226 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
11227 (__v4sf) __D,
11228 __imm,
11229 (__v16sf) __A, __B);
11230}
11231#else
11232#define _mm512_maskz_insertf32x4(A, X, Y, C) \
11233 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
11234 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
18379eea 11235 (__mmask16)(A)))
756c5857
AI
11236
11237#define _mm512_maskz_inserti32x4(A, X, Y, C) \
11238 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
11239 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
18379eea 11240 (__mmask16)(A)))
756c5857
AI
11241
11242#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
11243 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
11244 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
18379eea 11245 (__mmask16)(B)))
756c5857
AI
11246
11247#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
11248 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
11249 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
18379eea 11250 (__mmask16)(B)))
756c5857
AI
11251#endif
11252
11253extern __inline __m512i
11254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11255_mm512_max_epi64 (__m512i __A, __m512i __B)
11256{
11257 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11258 (__v8di) __B,
11259 (__v8di)
4271e5cb 11260 _mm512_undefined_epi32 (),
756c5857
AI
11261 (__mmask8) -1);
11262}
11263
11264extern __inline __m512i
11265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11266_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
11267{
11268 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11269 (__v8di) __B,
11270 (__v8di)
11271 _mm512_setzero_si512 (),
11272 __M);
11273}
11274
11275extern __inline __m512i
11276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11277_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11278{
11279 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11280 (__v8di) __B,
11281 (__v8di) __W, __M);
11282}
11283
11284extern __inline __m512i
11285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11286_mm512_min_epi64 (__m512i __A, __m512i __B)
11287{
11288 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11289 (__v8di) __B,
11290 (__v8di)
4271e5cb 11291 _mm512_undefined_epi32 (),
756c5857
AI
11292 (__mmask8) -1);
11293}
11294
11295extern __inline __m512i
11296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11297_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11298{
11299 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11300 (__v8di) __B,
11301 (__v8di) __W, __M);
11302}
11303
11304extern __inline __m512i
11305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11306_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
11307{
11308 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11309 (__v8di) __B,
11310 (__v8di)
11311 _mm512_setzero_si512 (),
11312 __M);
11313}
11314
11315extern __inline __m512i
11316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11317_mm512_max_epu64 (__m512i __A, __m512i __B)
11318{
11319 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11320 (__v8di) __B,
11321 (__v8di)
4271e5cb 11322 _mm512_undefined_epi32 (),
756c5857
AI
11323 (__mmask8) -1);
11324}
11325
11326extern __inline __m512i
11327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11328_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11329{
11330 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11331 (__v8di) __B,
11332 (__v8di)
11333 _mm512_setzero_si512 (),
11334 __M);
11335}
11336
11337extern __inline __m512i
11338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11339_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11340{
11341 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11342 (__v8di) __B,
11343 (__v8di) __W, __M);
11344}
11345
11346extern __inline __m512i
11347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11348_mm512_min_epu64 (__m512i __A, __m512i __B)
11349{
11350 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11351 (__v8di) __B,
11352 (__v8di)
4271e5cb 11353 _mm512_undefined_epi32 (),
756c5857
AI
11354 (__mmask8) -1);
11355}
11356
11357extern __inline __m512i
11358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11359_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11360{
11361 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11362 (__v8di) __B,
11363 (__v8di) __W, __M);
11364}
11365
11366extern __inline __m512i
11367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11369{
11370 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11371 (__v8di) __B,
11372 (__v8di)
11373 _mm512_setzero_si512 (),
11374 __M);
11375}
11376
11377extern __inline __m512i
11378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11379_mm512_max_epi32 (__m512i __A, __m512i __B)
11380{
11381 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11382 (__v16si) __B,
11383 (__v16si)
4271e5cb 11384 _mm512_undefined_epi32 (),
756c5857
AI
11385 (__mmask16) -1);
11386}
11387
11388extern __inline __m512i
11389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11390_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11391{
11392 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11393 (__v16si) __B,
11394 (__v16si)
11395 _mm512_setzero_si512 (),
11396 __M);
11397}
11398
11399extern __inline __m512i
11400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11401_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11402{
11403 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11404 (__v16si) __B,
11405 (__v16si) __W, __M);
11406}
11407
11408extern __inline __m512i
11409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11410_mm512_min_epi32 (__m512i __A, __m512i __B)
11411{
11412 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11413 (__v16si) __B,
11414 (__v16si)
4271e5cb 11415 _mm512_undefined_epi32 (),
756c5857
AI
11416 (__mmask16) -1);
11417}
11418
11419extern __inline __m512i
11420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11421_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11422{
11423 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11424 (__v16si) __B,
11425 (__v16si)
11426 _mm512_setzero_si512 (),
11427 __M);
11428}
11429
11430extern __inline __m512i
11431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11432_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11433{
11434 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11435 (__v16si) __B,
11436 (__v16si) __W, __M);
11437}
11438
11439extern __inline __m512i
11440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11441_mm512_max_epu32 (__m512i __A, __m512i __B)
11442{
11443 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11444 (__v16si) __B,
11445 (__v16si)
4271e5cb 11446 _mm512_undefined_epi32 (),
756c5857
AI
11447 (__mmask16) -1);
11448}
11449
11450extern __inline __m512i
11451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11452_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11453{
11454 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11455 (__v16si) __B,
11456 (__v16si)
11457 _mm512_setzero_si512 (),
11458 __M);
11459}
11460
11461extern __inline __m512i
11462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11463_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11464{
11465 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11466 (__v16si) __B,
11467 (__v16si) __W, __M);
11468}
11469
11470extern __inline __m512i
11471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11472_mm512_min_epu32 (__m512i __A, __m512i __B)
11473{
11474 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11475 (__v16si) __B,
11476 (__v16si)
4271e5cb 11477 _mm512_undefined_epi32 (),
756c5857
AI
11478 (__mmask16) -1);
11479}
11480
11481extern __inline __m512i
11482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11483_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11484{
11485 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11486 (__v16si) __B,
11487 (__v16si)
11488 _mm512_setzero_si512 (),
11489 __M);
11490}
11491
11492extern __inline __m512i
11493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11494_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11495{
11496 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11497 (__v16si) __B,
11498 (__v16si) __W, __M);
11499}
11500
11501extern __inline __m512
11502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11503_mm512_unpacklo_ps (__m512 __A, __m512 __B)
11504{
11505 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11506 (__v16sf) __B,
11507 (__v16sf)
0b192937 11508 _mm512_undefined_ps (),
756c5857
AI
11509 (__mmask16) -1);
11510}
11511
11512extern __inline __m512
11513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11514_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11515{
11516 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11517 (__v16sf) __B,
11518 (__v16sf) __W,
11519 (__mmask16) __U);
11520}
11521
11522extern __inline __m512
11523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11524_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
11525{
11526 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11527 (__v16sf) __B,
11528 (__v16sf)
11529 _mm512_setzero_ps (),
11530 (__mmask16) __U);
11531}
11532
075691af
AI
11533#ifdef __OPTIMIZE__
11534extern __inline __m128d
11535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11536_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
11537{
11538 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
11539 (__v2df) __B,
11540 __R);
11541}
11542
f4ee3a9e
UB
11543extern __inline __m128d
11544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11545_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11546 __m128d __B, const int __R)
11547{
11548 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11549 (__v2df) __B,
11550 (__v2df) __W,
11551 (__mmask8) __U, __R);
11552}
11553
11554extern __inline __m128d
11555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11556_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11557 const int __R)
11558{
11559 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11560 (__v2df) __B,
11561 (__v2df)
11562 _mm_setzero_pd (),
11563 (__mmask8) __U, __R);
11564}
11565
075691af
AI
11566extern __inline __m128
11567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11568_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
11569{
11570 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
11571 (__v4sf) __B,
11572 __R);
11573}
11574
f4ee3a9e
UB
11575extern __inline __m128
11576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11577_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11578 __m128 __B, const int __R)
11579{
11580 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11581 (__v4sf) __B,
11582 (__v4sf) __W,
11583 (__mmask8) __U, __R);
11584}
11585
11586extern __inline __m128
11587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11588_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11589 const int __R)
11590{
11591 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11592 (__v4sf) __B,
11593 (__v4sf)
11594 _mm_setzero_ps (),
11595 (__mmask8) __U, __R);
11596}
11597
075691af
AI
11598extern __inline __m128d
11599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11600_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
11601{
11602 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
11603 (__v2df) __B,
11604 __R);
11605}
11606
f4ee3a9e
UB
11607extern __inline __m128d
11608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11609_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11610 __m128d __B, const int __R)
11611{
11612 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11613 (__v2df) __B,
11614 (__v2df) __W,
11615 (__mmask8) __U, __R);
11616}
11617
11618extern __inline __m128d
11619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11621 const int __R)
11622{
11623 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11624 (__v2df) __B,
11625 (__v2df)
11626 _mm_setzero_pd (),
11627 (__mmask8) __U, __R);
11628}
11629
075691af
AI
11630extern __inline __m128
11631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11632_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
11633{
11634 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
11635 (__v4sf) __B,
11636 __R);
11637}
11638
f4ee3a9e
UB
11639extern __inline __m128
11640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11641_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11642 __m128 __B, const int __R)
11643{
11644 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11645 (__v4sf) __B,
11646 (__v4sf) __W,
11647 (__mmask8) __U, __R);
11648}
11649
11650extern __inline __m128
11651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11652_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11653 const int __R)
11654{
11655 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11656 (__v4sf) __B,
11657 (__v4sf)
11658 _mm_setzero_ps (),
11659 (__mmask8) __U, __R);
11660}
11661
075691af
AI
11662#else
11663#define _mm_max_round_sd(A, B, C) \
f4ee3a9e
UB
11664 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
11665
11666#define _mm_mask_max_round_sd(W, U, A, B, C) \
11667 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
11668
11669#define _mm_maskz_max_round_sd(U, A, B, C) \
11670 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
11671
11672#define _mm_max_round_ss(A, B, C) \
f4ee3a9e
UB
11673 (__m128)__builtin_ia32_maxss_round(A, B, C)
11674
11675#define _mm_mask_max_round_ss(W, U, A, B, C) \
11676 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
11677
11678#define _mm_maskz_max_round_ss(U, A, B, C) \
11679 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
075691af
AI
11680
11681#define _mm_min_round_sd(A, B, C) \
f4ee3a9e
UB
11682 (__m128d)__builtin_ia32_minsd_round(A, B, C)
11683
11684#define _mm_mask_min_round_sd(W, U, A, B, C) \
11685 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11686
11687#define _mm_maskz_min_round_sd(U, A, B, C) \
11688 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
11689
11690#define _mm_min_round_ss(A, B, C) \
f4ee3a9e
UB
11691 (__m128)__builtin_ia32_minss_round(A, B, C)
11692
11693#define _mm_mask_min_round_ss(W, U, A, B, C) \
11694 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11695
11696#define _mm_maskz_min_round_ss(U, A, B, C) \
11697 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11698
075691af
AI
11699#endif
11700
756c5857
AI
11701extern __inline __m512d
11702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11703_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11704{
11705 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11706 (__v8df) __W,
11707 (__mmask8) __U);
11708}
11709
11710extern __inline __m512
11711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11712_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11713{
11714 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11715 (__v16sf) __W,
11716 (__mmask16) __U);
11717}
11718
11719extern __inline __m512i
11720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11721_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11722{
11723 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11724 (__v8di) __W,
11725 (__mmask8) __U);
11726}
11727
11728extern __inline __m512i
11729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11730_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11731{
11732 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11733 (__v16si) __W,
11734 (__mmask16) __U);
11735}
11736
075691af
AI
11737#ifdef __OPTIMIZE__
11738extern __inline __m128d
11739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11740_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11741{
11742 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11743 (__v2df) __A,
11744 (__v2df) __B,
11745 __R);
11746}
11747
11748extern __inline __m128
11749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11750_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11751{
11752 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11753 (__v4sf) __A,
11754 (__v4sf) __B,
11755 __R);
11756}
11757
11758extern __inline __m128d
11759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11760_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11761{
11762 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11763 (__v2df) __A,
11764 -(__v2df) __B,
11765 __R);
11766}
11767
11768extern __inline __m128
11769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11770_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11771{
11772 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11773 (__v4sf) __A,
11774 -(__v4sf) __B,
11775 __R);
11776}
11777
11778extern __inline __m128d
11779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11780_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11781{
11782 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11783 -(__v2df) __A,
11784 (__v2df) __B,
11785 __R);
11786}
11787
11788extern __inline __m128
11789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11790_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11791{
11792 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11793 -(__v4sf) __A,
11794 (__v4sf) __B,
11795 __R);
11796}
11797
11798extern __inline __m128d
11799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11800_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11801{
11802 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11803 -(__v2df) __A,
11804 -(__v2df) __B,
11805 __R);
11806}
11807
11808extern __inline __m128
11809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11810_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11811{
11812 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11813 -(__v4sf) __A,
11814 -(__v4sf) __B,
11815 __R);
11816}
11817#else
11818#define _mm_fmadd_round_sd(A, B, C, R) \
11819 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11820
11821#define _mm_fmadd_round_ss(A, B, C, R) \
11822 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11823
11824#define _mm_fmsub_round_sd(A, B, C, R) \
11825 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11826
11827#define _mm_fmsub_round_ss(A, B, C, R) \
11828 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11829
11830#define _mm_fnmadd_round_sd(A, B, C, R) \
11831 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11832
11833#define _mm_fnmadd_round_ss(A, B, C, R) \
11834 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11835
11836#define _mm_fnmsub_round_sd(A, B, C, R) \
11837 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11838
11839#define _mm_fnmsub_round_ss(A, B, C, R) \
11840 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11841#endif
11842
5c4ade6d
JJ
11843extern __inline __m128d
11844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11845_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11846{
11847 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11848 (__v2df) __A,
11849 (__v2df) __B,
11850 (__mmask8) __U,
11851 _MM_FROUND_CUR_DIRECTION);
11852}
11853
11854extern __inline __m128
11855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11856_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11857{
11858 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11859 (__v4sf) __A,
11860 (__v4sf) __B,
11861 (__mmask8) __U,
11862 _MM_FROUND_CUR_DIRECTION);
11863}
11864
11865extern __inline __m128d
11866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11867_mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11868{
11869 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
11870 (__v2df) __A,
11871 (__v2df) __B,
11872 (__mmask8) __U,
11873 _MM_FROUND_CUR_DIRECTION);
11874}
11875
11876extern __inline __m128
11877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878_mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11879{
11880 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
11881 (__v4sf) __A,
11882 (__v4sf) __B,
11883 (__mmask8) __U,
11884 _MM_FROUND_CUR_DIRECTION);
11885}
11886
11887extern __inline __m128d
11888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11889_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11890{
11891 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11892 (__v2df) __A,
11893 (__v2df) __B,
11894 (__mmask8) __U,
11895 _MM_FROUND_CUR_DIRECTION);
11896}
11897
11898extern __inline __m128
11899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11900_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11901{
11902 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11903 (__v4sf) __A,
11904 (__v4sf) __B,
11905 (__mmask8) __U,
11906 _MM_FROUND_CUR_DIRECTION);
11907}
11908
11909extern __inline __m128d
11910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11911_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11912{
11913 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11914 (__v2df) __A,
11915 -(__v2df) __B,
11916 (__mmask8) __U,
11917 _MM_FROUND_CUR_DIRECTION);
11918}
11919
11920extern __inline __m128
11921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11922_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11923{
11924 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11925 (__v4sf) __A,
11926 -(__v4sf) __B,
11927 (__mmask8) __U,
11928 _MM_FROUND_CUR_DIRECTION);
11929}
11930
11931extern __inline __m128d
11932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11933_mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11934{
11935 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
11936 (__v2df) __A,
11937 (__v2df) __B,
11938 (__mmask8) __U,
11939 _MM_FROUND_CUR_DIRECTION);
11940}
11941
11942extern __inline __m128
11943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11944_mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11945{
11946 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
11947 (__v4sf) __A,
11948 (__v4sf) __B,
11949 (__mmask8) __U,
11950 _MM_FROUND_CUR_DIRECTION);
11951}
11952
11953extern __inline __m128d
11954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11955_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11956{
11957 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11958 (__v2df) __A,
11959 -(__v2df) __B,
11960 (__mmask8) __U,
11961 _MM_FROUND_CUR_DIRECTION);
11962}
11963
11964extern __inline __m128
11965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11966_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11967{
11968 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11969 (__v4sf) __A,
11970 -(__v4sf) __B,
11971 (__mmask8) __U,
11972 _MM_FROUND_CUR_DIRECTION);
11973}
11974
11975extern __inline __m128d
11976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11977_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11978{
11979 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11980 -(__v2df) __A,
11981 (__v2df) __B,
11982 (__mmask8) __U,
11983 _MM_FROUND_CUR_DIRECTION);
11984}
11985
11986extern __inline __m128
11987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11988_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11989{
11990 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11991 -(__v4sf) __A,
11992 (__v4sf) __B,
11993 (__mmask8) __U,
11994 _MM_FROUND_CUR_DIRECTION);
11995}
11996
11997extern __inline __m128d
11998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11999_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
12000{
12001 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
12002 -(__v2df) __A,
12003 (__v2df) __B,
12004 (__mmask8) __U,
12005 _MM_FROUND_CUR_DIRECTION);
12006}
12007
12008extern __inline __m128
12009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12010_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
12011{
12012 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
12013 -(__v4sf) __A,
12014 (__v4sf) __B,
12015 (__mmask8) __U,
12016 _MM_FROUND_CUR_DIRECTION);
12017}
12018
12019extern __inline __m128d
12020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12021_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
12022{
12023 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12024 -(__v2df) __A,
12025 (__v2df) __B,
12026 (__mmask8) __U,
12027 _MM_FROUND_CUR_DIRECTION);
12028}
12029
12030extern __inline __m128
12031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12032_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
12033{
12034 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12035 -(__v4sf) __A,
12036 (__v4sf) __B,
12037 (__mmask8) __U,
12038 _MM_FROUND_CUR_DIRECTION);
12039}
12040
12041extern __inline __m128d
12042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12043_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12044{
12045 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12046 -(__v2df) __A,
12047 -(__v2df) __B,
12048 (__mmask8) __U,
12049 _MM_FROUND_CUR_DIRECTION);
12050}
12051
12052extern __inline __m128
12053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12054_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12055{
12056 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12057 -(__v4sf) __A,
12058 -(__v4sf) __B,
12059 (__mmask8) __U,
12060 _MM_FROUND_CUR_DIRECTION);
12061}
12062
12063extern __inline __m128d
12064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12065_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
12066{
12067 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
12068 -(__v2df) __A,
12069 (__v2df) __B,
12070 (__mmask8) __U,
12071 _MM_FROUND_CUR_DIRECTION);
12072}
12073
12074extern __inline __m128
12075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12076_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
12077{
12078 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12079 -(__v4sf) __A,
12080 (__v4sf) __B,
12081 (__mmask8) __U,
12082 _MM_FROUND_CUR_DIRECTION);
12083}
12084
12085extern __inline __m128d
12086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12087_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
12088{
12089 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12090 -(__v2df) __A,
12091 -(__v2df) __B,
12092 (__mmask8) __U,
12093 _MM_FROUND_CUR_DIRECTION);
12094}
12095
12096extern __inline __m128
12097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12098_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
12099{
12100 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12101 -(__v4sf) __A,
12102 -(__v4sf) __B,
12103 (__mmask8) __U,
12104 _MM_FROUND_CUR_DIRECTION);
12105}
12106
12107#ifdef __OPTIMIZE__
12108extern __inline __m128d
12109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12110_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12111 const int __R)
12112{
12113 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12114 (__v2df) __A,
12115 (__v2df) __B,
12116 (__mmask8) __U, __R);
12117}
12118
12119extern __inline __m128
12120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12121_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12122 const int __R)
12123{
12124 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12125 (__v4sf) __A,
12126 (__v4sf) __B,
12127 (__mmask8) __U, __R);
12128}
12129
12130extern __inline __m128d
12131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12132_mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12133 const int __R)
12134{
12135 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
12136 (__v2df) __A,
12137 (__v2df) __B,
12138 (__mmask8) __U, __R);
12139}
12140
12141extern __inline __m128
12142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12143_mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12144 const int __R)
12145{
12146 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
12147 (__v4sf) __A,
12148 (__v4sf) __B,
12149 (__mmask8) __U, __R);
12150}
12151
12152extern __inline __m128d
12153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12155 const int __R)
12156{
12157 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12158 (__v2df) __A,
12159 (__v2df) __B,
12160 (__mmask8) __U, __R);
12161}
12162
12163extern __inline __m128
12164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12165_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12166 const int __R)
12167{
12168 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12169 (__v4sf) __A,
12170 (__v4sf) __B,
12171 (__mmask8) __U, __R);
12172}
12173
12174extern __inline __m128d
12175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12176_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12177 const int __R)
12178{
12179 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12180 (__v2df) __A,
12181 -(__v2df) __B,
12182 (__mmask8) __U, __R);
12183}
12184
12185extern __inline __m128
12186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12187_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12188 const int __R)
12189{
12190 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12191 (__v4sf) __A,
12192 -(__v4sf) __B,
12193 (__mmask8) __U, __R);
12194}
12195
12196extern __inline __m128d
12197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12198_mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12199 const int __R)
12200{
12201 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
12202 (__v2df) __A,
12203 (__v2df) __B,
12204 (__mmask8) __U, __R);
12205}
12206
12207extern __inline __m128
12208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12209_mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12210 const int __R)
12211{
12212 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12213 (__v4sf) __A,
12214 (__v4sf) __B,
12215 (__mmask8) __U, __R);
12216}
12217
12218extern __inline __m128d
12219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12220_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12221 const int __R)
12222{
12223 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12224 (__v2df) __A,
12225 -(__v2df) __B,
12226 (__mmask8) __U, __R);
12227}
12228
12229extern __inline __m128
12230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12231_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12232 const int __R)
12233{
12234 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12235 (__v4sf) __A,
12236 -(__v4sf) __B,
12237 (__mmask8) __U, __R);
12238}
12239
12240extern __inline __m128d
12241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12242_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12243 const int __R)
12244{
12245 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12246 -(__v2df) __A,
12247 (__v2df) __B,
12248 (__mmask8) __U, __R);
12249}
12250
12251extern __inline __m128
12252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12253_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12254 const int __R)
12255{
12256 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12257 -(__v4sf) __A,
12258 (__v4sf) __B,
12259 (__mmask8) __U, __R);
12260}
12261
12262extern __inline __m128d
12263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12264_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12265 const int __R)
12266{
12267 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
12268 -(__v2df) __A,
12269 (__v2df) __B,
12270 (__mmask8) __U, __R);
12271}
12272
12273extern __inline __m128
12274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12275_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12276 const int __R)
12277{
12278 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
12279 -(__v4sf) __A,
12280 (__v4sf) __B,
12281 (__mmask8) __U, __R);
12282}
12283
12284extern __inline __m128d
12285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12286_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12287 const int __R)
12288{
12289 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12290 -(__v2df) __A,
12291 (__v2df) __B,
12292 (__mmask8) __U, __R);
12293}
12294
12295extern __inline __m128
12296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12297_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12298 const int __R)
12299{
12300 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12301 -(__v4sf) __A,
12302 (__v4sf) __B,
12303 (__mmask8) __U, __R);
12304}
12305
12306extern __inline __m128d
12307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12308_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12309 const int __R)
12310{
12311 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12312 -(__v2df) __A,
12313 -(__v2df) __B,
12314 (__mmask8) __U, __R);
12315}
12316
12317extern __inline __m128
12318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12319_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12320 const int __R)
12321{
12322 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12323 -(__v4sf) __A,
12324 -(__v4sf) __B,
12325 (__mmask8) __U, __R);
12326}
12327
12328extern __inline __m128d
12329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12330_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12331 const int __R)
12332{
12333 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
12334 -(__v2df) __A,
12335 (__v2df) __B,
12336 (__mmask8) __U, __R);
12337}
12338
12339extern __inline __m128
12340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12341_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12342 const int __R)
12343{
12344 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12345 -(__v4sf) __A,
12346 (__v4sf) __B,
12347 (__mmask8) __U, __R);
12348}
12349
12350extern __inline __m128d
12351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12352_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12353 const int __R)
12354{
12355 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12356 -(__v2df) __A,
12357 -(__v2df) __B,
12358 (__mmask8) __U, __R);
12359}
12360
12361extern __inline __m128
12362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12363_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12364 const int __R)
12365{
12366 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12367 -(__v4sf) __A,
12368 -(__v4sf) __B,
12369 (__mmask8) __U, __R);
12370}
12371#else
12372#define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
12373 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
12374
12375#define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
12376 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
12377
12378#define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
12379 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
12380
12381#define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
12382 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
12383
12384#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
aadd9a6e 12385 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
5c4ade6d
JJ
12386
12387#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
aadd9a6e 12388 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
5c4ade6d
JJ
12389
12390#define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
12391 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
12392
12393#define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
12394 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
12395
12396#define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
12397 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
12398
12399#define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
12400 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
12401
12402#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
aadd9a6e 12403 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
5c4ade6d
JJ
12404
12405#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
aadd9a6e 12406 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
5c4ade6d
JJ
12407
12408#define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
12409 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
12410
12411#define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
12412 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
12413
12414#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
12415 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
12416
12417#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
12418 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
12419
12420#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
aadd9a6e 12421 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
5c4ade6d
JJ
12422
12423#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
aadd9a6e 12424 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
5c4ade6d
JJ
12425
12426#define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
12427 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
12428
12429#define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
12430 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
12431
12432#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
12433 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
12434
12435#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
12436 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
12437
12438#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
aadd9a6e 12439 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
5c4ade6d
JJ
12440
12441#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
aadd9a6e 12442 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
5c4ade6d
JJ
12443#endif
12444
756c5857
AI
12445#ifdef __OPTIMIZE__
12446extern __inline int
12447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12448_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
12449{
12450 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
12451}
12452
12453extern __inline int
12454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12455_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
12456{
12457 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
12458}
12459#else
12460#define _mm_comi_round_ss(A, B, C, D)\
12461__builtin_ia32_vcomiss(A, B, C, D)
12462#define _mm_comi_round_sd(A, B, C, D)\
12463__builtin_ia32_vcomisd(A, B, C, D)
12464#endif
12465
12466extern __inline __m512d
12467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12468_mm512_sqrt_pd (__m512d __A)
12469{
12470 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12471 (__v8df)
0b192937 12472 _mm512_undefined_pd (),
756c5857
AI
12473 (__mmask8) -1,
12474 _MM_FROUND_CUR_DIRECTION);
12475}
12476
12477extern __inline __m512d
12478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12479_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
12480{
12481 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12482 (__v8df) __W,
12483 (__mmask8) __U,
12484 _MM_FROUND_CUR_DIRECTION);
12485}
12486
12487extern __inline __m512d
12488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12489_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
12490{
12491 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12492 (__v8df)
12493 _mm512_setzero_pd (),
12494 (__mmask8) __U,
12495 _MM_FROUND_CUR_DIRECTION);
12496}
12497
12498extern __inline __m512
12499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12500_mm512_sqrt_ps (__m512 __A)
12501{
12502 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12503 (__v16sf)
0b192937 12504 _mm512_undefined_ps (),
756c5857
AI
12505 (__mmask16) -1,
12506 _MM_FROUND_CUR_DIRECTION);
12507}
12508
12509extern __inline __m512
12510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12511_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
12512{
12513 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12514 (__v16sf) __W,
12515 (__mmask16) __U,
12516 _MM_FROUND_CUR_DIRECTION);
12517}
12518
12519extern __inline __m512
12520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12521_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
12522{
12523 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12524 (__v16sf)
12525 _mm512_setzero_ps (),
12526 (__mmask16) __U,
12527 _MM_FROUND_CUR_DIRECTION);
12528}
12529
12530extern __inline __m512d
12531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12532_mm512_add_pd (__m512d __A, __m512d __B)
12533{
2069d6fc 12534 return (__m512d) ((__v8df)__A + (__v8df)__B);
756c5857
AI
12535}
12536
12537extern __inline __m512d
12538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12539_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12540{
12541 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
12542 (__v8df) __B,
12543 (__v8df) __W,
12544 (__mmask8) __U,
12545 _MM_FROUND_CUR_DIRECTION);
12546}
12547
12548extern __inline __m512d
12549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12550_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
12551{
12552 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
12553 (__v8df) __B,
12554 (__v8df)
12555 _mm512_setzero_pd (),
12556 (__mmask8) __U,
12557 _MM_FROUND_CUR_DIRECTION);
12558}
12559
12560extern __inline __m512
12561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12562_mm512_add_ps (__m512 __A, __m512 __B)
12563{
2069d6fc 12564 return (__m512) ((__v16sf)__A + (__v16sf)__B);
756c5857
AI
12565}
12566
12567extern __inline __m512
12568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12569_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12570{
12571 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
12572 (__v16sf) __B,
12573 (__v16sf) __W,
12574 (__mmask16) __U,
12575 _MM_FROUND_CUR_DIRECTION);
12576}
12577
12578extern __inline __m512
12579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12580_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
12581{
12582 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
12583 (__v16sf) __B,
12584 (__v16sf)
12585 _mm512_setzero_ps (),
12586 (__mmask16) __U,
12587 _MM_FROUND_CUR_DIRECTION);
12588}
12589
1853f5c7
SP
12590extern __inline __m128d
12591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12592_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12593{
12594 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
12595 (__v2df) __B,
12596 (__v2df) __W,
12597 (__mmask8) __U,
12598 _MM_FROUND_CUR_DIRECTION);
12599}
12600
12601extern __inline __m128d
12602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12603_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
12604{
12605 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
12606 (__v2df) __B,
12607 (__v2df)
12608 _mm_setzero_pd (),
12609 (__mmask8) __U,
12610 _MM_FROUND_CUR_DIRECTION);
12611}
12612
12613extern __inline __m128
12614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12615_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12616{
12617 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
12618 (__v4sf) __B,
12619 (__v4sf) __W,
12620 (__mmask8) __U,
12621 _MM_FROUND_CUR_DIRECTION);
12622}
12623
12624extern __inline __m128
12625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12626_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
12627{
12628 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
12629 (__v4sf) __B,
12630 (__v4sf)
12631 _mm_setzero_ps (),
12632 (__mmask8) __U,
12633 _MM_FROUND_CUR_DIRECTION);
12634}
12635
756c5857
AI
12636extern __inline __m512d
12637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12638_mm512_sub_pd (__m512d __A, __m512d __B)
12639{
2069d6fc 12640 return (__m512d) ((__v8df)__A - (__v8df)__B);
756c5857
AI
12641}
12642
12643extern __inline __m512d
12644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12645_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12646{
12647 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
12648 (__v8df) __B,
12649 (__v8df) __W,
12650 (__mmask8) __U,
12651 _MM_FROUND_CUR_DIRECTION);
12652}
12653
12654extern __inline __m512d
12655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12656_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
12657{
12658 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
12659 (__v8df) __B,
12660 (__v8df)
12661 _mm512_setzero_pd (),
12662 (__mmask8) __U,
12663 _MM_FROUND_CUR_DIRECTION);
12664}
12665
12666extern __inline __m512
12667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12668_mm512_sub_ps (__m512 __A, __m512 __B)
12669{
2069d6fc 12670 return (__m512) ((__v16sf)__A - (__v16sf)__B);
756c5857
AI
12671}
12672
12673extern __inline __m512
12674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12675_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12676{
12677 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
12678 (__v16sf) __B,
12679 (__v16sf) __W,
12680 (__mmask16) __U,
12681 _MM_FROUND_CUR_DIRECTION);
12682}
12683
12684extern __inline __m512
12685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12686_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
12687{
12688 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
12689 (__v16sf) __B,
12690 (__v16sf)
12691 _mm512_setzero_ps (),
12692 (__mmask16) __U,
12693 _MM_FROUND_CUR_DIRECTION);
12694}
12695
1853f5c7
SP
12696extern __inline __m128d
12697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12698_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12699{
12700 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
12701 (__v2df) __B,
12702 (__v2df) __W,
12703 (__mmask8) __U,
12704 _MM_FROUND_CUR_DIRECTION);
12705}
12706
12707extern __inline __m128d
12708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12709_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
12710{
12711 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
12712 (__v2df) __B,
12713 (__v2df)
12714 _mm_setzero_pd (),
12715 (__mmask8) __U,
12716 _MM_FROUND_CUR_DIRECTION);
12717}
12718
12719extern __inline __m128
12720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12721_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12722{
12723 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
12724 (__v4sf) __B,
12725 (__v4sf) __W,
12726 (__mmask8) __U,
12727 _MM_FROUND_CUR_DIRECTION);
12728}
12729
12730extern __inline __m128
12731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12732_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
12733{
12734 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
12735 (__v4sf) __B,
12736 (__v4sf)
12737 _mm_setzero_ps (),
12738 (__mmask8) __U,
12739 _MM_FROUND_CUR_DIRECTION);
12740}
12741
756c5857
AI
12742extern __inline __m512d
12743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12744_mm512_mul_pd (__m512d __A, __m512d __B)
12745{
2069d6fc 12746 return (__m512d) ((__v8df)__A * (__v8df)__B);
756c5857
AI
12747}
12748
12749extern __inline __m512d
12750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12751_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12752{
12753 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
12754 (__v8df) __B,
12755 (__v8df) __W,
12756 (__mmask8) __U,
12757 _MM_FROUND_CUR_DIRECTION);
12758}
12759
12760extern __inline __m512d
12761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12762_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
12763{
12764 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
12765 (__v8df) __B,
12766 (__v8df)
12767 _mm512_setzero_pd (),
12768 (__mmask8) __U,
12769 _MM_FROUND_CUR_DIRECTION);
12770}
12771
12772extern __inline __m512
12773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12774_mm512_mul_ps (__m512 __A, __m512 __B)
12775{
2069d6fc 12776 return (__m512) ((__v16sf)__A * (__v16sf)__B);
756c5857
AI
12777}
12778
12779extern __inline __m512
12780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12781_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12782{
12783 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
12784 (__v16sf) __B,
12785 (__v16sf) __W,
12786 (__mmask16) __U,
12787 _MM_FROUND_CUR_DIRECTION);
12788}
12789
12790extern __inline __m512
12791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12792_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
12793{
12794 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
12795 (__v16sf) __B,
12796 (__v16sf)
12797 _mm512_setzero_ps (),
12798 (__mmask16) __U,
12799 _MM_FROUND_CUR_DIRECTION);
12800}
12801
f4ee3a9e
UB
12802extern __inline __m128d
12803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12804_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
12805 __m128d __B)
12806{
12807 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
12808 (__v2df) __B,
12809 (__v2df) __W,
12810 (__mmask8) __U,
12811 _MM_FROUND_CUR_DIRECTION);
12812}
12813
12814extern __inline __m128d
12815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12816_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
12817{
12818 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
12819 (__v2df) __B,
12820 (__v2df)
12821 _mm_setzero_pd (),
12822 (__mmask8) __U,
12823 _MM_FROUND_CUR_DIRECTION);
12824}
12825
12826extern __inline __m128
12827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12828_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
12829 __m128 __B)
12830{
12831 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
12832 (__v4sf) __B,
12833 (__v4sf) __W,
12834 (__mmask8) __U,
12835 _MM_FROUND_CUR_DIRECTION);
12836}
12837
12838extern __inline __m128
12839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12840_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
12841{
12842 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
12843 (__v4sf) __B,
12844 (__v4sf)
12845 _mm_setzero_ps (),
12846 (__mmask8) __U,
12847 _MM_FROUND_CUR_DIRECTION);
12848}
12849
756c5857
AI
12850extern __inline __m512d
12851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12852_mm512_div_pd (__m512d __M, __m512d __V)
12853{
2069d6fc 12854 return (__m512d) ((__v8df)__M / (__v8df)__V);
756c5857
AI
12855}
12856
12857extern __inline __m512d
12858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12859_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
12860{
12861 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
12862 (__v8df) __V,
12863 (__v8df) __W,
12864 (__mmask8) __U,
12865 _MM_FROUND_CUR_DIRECTION);
12866}
12867
12868extern __inline __m512d
12869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12870_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
12871{
12872 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
12873 (__v8df) __V,
12874 (__v8df)
12875 _mm512_setzero_pd (),
12876 (__mmask8) __U,
12877 _MM_FROUND_CUR_DIRECTION);
12878}
12879
12880extern __inline __m512
12881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12882_mm512_div_ps (__m512 __A, __m512 __B)
12883{
2069d6fc 12884 return (__m512) ((__v16sf)__A / (__v16sf)__B);
756c5857
AI
12885}
12886
12887extern __inline __m512
12888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12889_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12890{
12891 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
12892 (__v16sf) __B,
12893 (__v16sf) __W,
12894 (__mmask16) __U,
12895 _MM_FROUND_CUR_DIRECTION);
12896}
12897
12898extern __inline __m512
12899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12900_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
12901{
12902 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
12903 (__v16sf) __B,
12904 (__v16sf)
12905 _mm512_setzero_ps (),
12906 (__mmask16) __U,
12907 _MM_FROUND_CUR_DIRECTION);
12908}
12909
f4ee3a9e
UB
12910extern __inline __m128d
12911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12912_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
12913 __m128d __B)
12914{
12915 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
12916 (__v2df) __B,
12917 (__v2df) __W,
12918 (__mmask8) __U,
12919 _MM_FROUND_CUR_DIRECTION);
12920}
12921
12922extern __inline __m128d
12923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12924_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
12925{
12926 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
12927 (__v2df) __B,
12928 (__v2df)
12929 _mm_setzero_pd (),
12930 (__mmask8) __U,
12931 _MM_FROUND_CUR_DIRECTION);
12932}
12933
12934extern __inline __m128
12935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12936_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
12937 __m128 __B)
12938{
12939 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
12940 (__v4sf) __B,
12941 (__v4sf) __W,
12942 (__mmask8) __U,
12943 _MM_FROUND_CUR_DIRECTION);
12944}
12945
12946extern __inline __m128
12947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12948_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
12949{
12950 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
12951 (__v4sf) __B,
12952 (__v4sf)
12953 _mm_setzero_ps (),
12954 (__mmask8) __U,
12955 _MM_FROUND_CUR_DIRECTION);
12956}
12957
756c5857
AI
12958extern __inline __m512d
12959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12960_mm512_max_pd (__m512d __A, __m512d __B)
12961{
12962 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12963 (__v8df) __B,
12964 (__v8df)
0b192937 12965 _mm512_undefined_pd (),
756c5857
AI
12966 (__mmask8) -1,
12967 _MM_FROUND_CUR_DIRECTION);
12968}
12969
12970extern __inline __m512d
12971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12972_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12973{
12974 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12975 (__v8df) __B,
12976 (__v8df) __W,
12977 (__mmask8) __U,
12978 _MM_FROUND_CUR_DIRECTION);
12979}
12980
12981extern __inline __m512d
12982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12983_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
12984{
12985 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12986 (__v8df) __B,
12987 (__v8df)
12988 _mm512_setzero_pd (),
12989 (__mmask8) __U,
12990 _MM_FROUND_CUR_DIRECTION);
12991}
12992
12993extern __inline __m512
12994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12995_mm512_max_ps (__m512 __A, __m512 __B)
12996{
12997 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12998 (__v16sf) __B,
12999 (__v16sf)
0b192937 13000 _mm512_undefined_ps (),
756c5857
AI
13001 (__mmask16) -1,
13002 _MM_FROUND_CUR_DIRECTION);
13003}
13004
13005extern __inline __m512
13006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13007_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13008{
13009 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13010 (__v16sf) __B,
13011 (__v16sf) __W,
13012 (__mmask16) __U,
13013 _MM_FROUND_CUR_DIRECTION);
13014}
13015
13016extern __inline __m512
13017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13018_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
13019{
13020 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13021 (__v16sf) __B,
13022 (__v16sf)
13023 _mm512_setzero_ps (),
13024 (__mmask16) __U,
13025 _MM_FROUND_CUR_DIRECTION);
13026}
13027
dc7401c0
SP
13028extern __inline __m128d
13029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13030_mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13031{
13032 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
13033 (__v2df) __B,
13034 (__v2df) __W,
13035 (__mmask8) __U,
13036 _MM_FROUND_CUR_DIRECTION);
13037}
13038
13039extern __inline __m128d
13040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13041_mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
13042{
13043 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
13044 (__v2df) __B,
13045 (__v2df)
13046 _mm_setzero_pd (),
13047 (__mmask8) __U,
13048 _MM_FROUND_CUR_DIRECTION);
13049}
13050
13051extern __inline __m128
13052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13053_mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13054{
13055 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
13056 (__v4sf) __B,
13057 (__v4sf) __W,
13058 (__mmask8) __U,
13059 _MM_FROUND_CUR_DIRECTION);
13060}
13061
13062extern __inline __m128
13063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13064_mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
13065{
13066 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
13067 (__v4sf) __B,
13068 (__v4sf)
13069 _mm_setzero_ps (),
13070 (__mmask8) __U,
13071 _MM_FROUND_CUR_DIRECTION);
13072}
13073
756c5857
AI
13074extern __inline __m512d
13075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13076_mm512_min_pd (__m512d __A, __m512d __B)
13077{
13078 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
13079 (__v8df) __B,
13080 (__v8df)
0b192937 13081 _mm512_undefined_pd (),
756c5857
AI
13082 (__mmask8) -1,
13083 _MM_FROUND_CUR_DIRECTION);
13084}
13085
13086extern __inline __m512d
13087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13088_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13089{
13090 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
13091 (__v8df) __B,
13092 (__v8df) __W,
13093 (__mmask8) __U,
13094 _MM_FROUND_CUR_DIRECTION);
13095}
13096
13097extern __inline __m512d
13098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13099_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
13100{
13101 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
13102 (__v8df) __B,
13103 (__v8df)
13104 _mm512_setzero_pd (),
13105 (__mmask8) __U,
13106 _MM_FROUND_CUR_DIRECTION);
13107}
13108
13109extern __inline __m512
13110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13111_mm512_min_ps (__m512 __A, __m512 __B)
13112{
13113 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
13114 (__v16sf) __B,
13115 (__v16sf)
0b192937 13116 _mm512_undefined_ps (),
756c5857
AI
13117 (__mmask16) -1,
13118 _MM_FROUND_CUR_DIRECTION);
13119}
13120
13121extern __inline __m512
13122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13123_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13124{
13125 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
13126 (__v16sf) __B,
13127 (__v16sf) __W,
13128 (__mmask16) __U,
13129 _MM_FROUND_CUR_DIRECTION);
13130}
13131
13132extern __inline __m512
13133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13134_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
13135{
13136 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
13137 (__v16sf) __B,
13138 (__v16sf)
13139 _mm512_setzero_ps (),
13140 (__mmask16) __U,
13141 _MM_FROUND_CUR_DIRECTION);
13142}
13143
dc7401c0
SP
13144extern __inline __m128d
13145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13146_mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13147{
13148 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
13149 (__v2df) __B,
13150 (__v2df) __W,
13151 (__mmask8) __U,
13152 _MM_FROUND_CUR_DIRECTION);
13153}
13154
13155extern __inline __m128d
13156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13157_mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
13158{
13159 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
13160 (__v2df) __B,
13161 (__v2df)
13162 _mm_setzero_pd (),
13163 (__mmask8) __U,
13164 _MM_FROUND_CUR_DIRECTION);
13165}
13166
13167extern __inline __m128
13168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13169_mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13170{
13171 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
13172 (__v4sf) __B,
13173 (__v4sf) __W,
13174 (__mmask8) __U,
13175 _MM_FROUND_CUR_DIRECTION);
13176}
13177
13178extern __inline __m128
13179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13180_mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
13181{
13182 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
13183 (__v4sf) __B,
13184 (__v4sf)
13185 _mm_setzero_ps (),
13186 (__mmask8) __U,
13187 _MM_FROUND_CUR_DIRECTION);
13188}
13189
756c5857
AI
13190extern __inline __m512d
13191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13192_mm512_scalef_pd (__m512d __A, __m512d __B)
13193{
13194 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
13195 (__v8df) __B,
13196 (__v8df)
0b192937 13197 _mm512_undefined_pd (),
756c5857
AI
13198 (__mmask8) -1,
13199 _MM_FROUND_CUR_DIRECTION);
13200}
13201
13202extern __inline __m512d
13203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13204_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13205{
13206 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
13207 (__v8df) __B,
13208 (__v8df) __W,
13209 (__mmask8) __U,
13210 _MM_FROUND_CUR_DIRECTION);
13211}
13212
13213extern __inline __m512d
13214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13215_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
13216{
13217 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
13218 (__v8df) __B,
13219 (__v8df)
13220 _mm512_setzero_pd (),
13221 (__mmask8) __U,
13222 _MM_FROUND_CUR_DIRECTION);
13223}
13224
13225extern __inline __m512
13226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13227_mm512_scalef_ps (__m512 __A, __m512 __B)
13228{
13229 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
13230 (__v16sf) __B,
13231 (__v16sf)
0b192937 13232 _mm512_undefined_ps (),
756c5857
AI
13233 (__mmask16) -1,
13234 _MM_FROUND_CUR_DIRECTION);
13235}
13236
13237extern __inline __m512
13238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13239_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13240{
13241 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
13242 (__v16sf) __B,
13243 (__v16sf) __W,
13244 (__mmask16) __U,
13245 _MM_FROUND_CUR_DIRECTION);
13246}
13247
13248extern __inline __m512
13249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13250_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
13251{
13252 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
13253 (__v16sf) __B,
13254 (__v16sf)
13255 _mm512_setzero_ps (),
13256 (__mmask16) __U,
13257 _MM_FROUND_CUR_DIRECTION);
13258}
13259
075691af
AI
13260extern __inline __m128d
13261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13262_mm_scalef_sd (__m128d __A, __m128d __B)
13263{
158061a6
OM
13264 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
13265 (__v2df) __B,
13266 (__v2df)
13267 _mm_setzero_pd (),
13268 (__mmask8) -1,
13269 _MM_FROUND_CUR_DIRECTION);
075691af
AI
13270}
13271
13272extern __inline __m128
13273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13274_mm_scalef_ss (__m128 __A, __m128 __B)
13275{
158061a6
OM
13276 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
13277 (__v4sf) __B,
13278 (__v4sf)
13279 _mm_setzero_ps (),
13280 (__mmask8) -1,
13281 _MM_FROUND_CUR_DIRECTION);
075691af
AI
13282}
13283
756c5857
AI
13284extern __inline __m512d
13285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13286_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
13287{
13288 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
13289 (__v8df) __B,
13290 (__v8df) __C,
13291 (__mmask8) -1,
13292 _MM_FROUND_CUR_DIRECTION);
13293}
13294
13295extern __inline __m512d
13296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13297_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13298{
13299 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
13300 (__v8df) __B,
13301 (__v8df) __C,
13302 (__mmask8) __U,
13303 _MM_FROUND_CUR_DIRECTION);
13304}
13305
13306extern __inline __m512d
13307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13308_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13309{
13310 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
13311 (__v8df) __B,
13312 (__v8df) __C,
13313 (__mmask8) __U,
13314 _MM_FROUND_CUR_DIRECTION);
13315}
13316
13317extern __inline __m512d
13318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13319_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13320{
13321 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
13322 (__v8df) __B,
13323 (__v8df) __C,
13324 (__mmask8) __U,
13325 _MM_FROUND_CUR_DIRECTION);
13326}
13327
13328extern __inline __m512
13329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13330_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
13331{
13332 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
13333 (__v16sf) __B,
13334 (__v16sf) __C,
13335 (__mmask16) -1,
13336 _MM_FROUND_CUR_DIRECTION);
13337}
13338
13339extern __inline __m512
13340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13341_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13342{
13343 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
13344 (__v16sf) __B,
13345 (__v16sf) __C,
13346 (__mmask16) __U,
13347 _MM_FROUND_CUR_DIRECTION);
13348}
13349
13350extern __inline __m512
13351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13352_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13353{
13354 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
13355 (__v16sf) __B,
13356 (__v16sf) __C,
13357 (__mmask16) __U,
13358 _MM_FROUND_CUR_DIRECTION);
13359}
13360
13361extern __inline __m512
13362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13363_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13364{
13365 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
13366 (__v16sf) __B,
13367 (__v16sf) __C,
13368 (__mmask16) __U,
13369 _MM_FROUND_CUR_DIRECTION);
13370}
13371
13372extern __inline __m512d
13373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13374_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
13375{
fe7f972d 13376 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 13377 (__v8df) __B,
fe7f972d 13378 (__v8df) __C,
756c5857
AI
13379 (__mmask8) -1,
13380 _MM_FROUND_CUR_DIRECTION);
13381}
13382
13383extern __inline __m512d
13384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13385_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13386{
fe7f972d 13387 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 13388 (__v8df) __B,
fe7f972d 13389 (__v8df) __C,
756c5857
AI
13390 (__mmask8) __U,
13391 _MM_FROUND_CUR_DIRECTION);
13392}
13393
13394extern __inline __m512d
13395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13396_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13397{
13398 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
13399 (__v8df) __B,
13400 (__v8df) __C,
13401 (__mmask8) __U,
13402 _MM_FROUND_CUR_DIRECTION);
13403}
13404
13405extern __inline __m512d
13406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13407_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13408{
fe7f972d 13409 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
756c5857 13410 (__v8df) __B,
fe7f972d 13411 (__v8df) __C,
756c5857
AI
13412 (__mmask8) __U,
13413 _MM_FROUND_CUR_DIRECTION);
13414}
13415
13416extern __inline __m512
13417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13418_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
13419{
fe7f972d 13420 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 13421 (__v16sf) __B,
fe7f972d 13422 (__v16sf) __C,
756c5857
AI
13423 (__mmask16) -1,
13424 _MM_FROUND_CUR_DIRECTION);
13425}
13426
13427extern __inline __m512
13428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13429_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13430{
fe7f972d 13431 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 13432 (__v16sf) __B,
fe7f972d 13433 (__v16sf) __C,
756c5857
AI
13434 (__mmask16) __U,
13435 _MM_FROUND_CUR_DIRECTION);
13436}
13437
13438extern __inline __m512
13439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13440_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13441{
13442 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
13443 (__v16sf) __B,
13444 (__v16sf) __C,
13445 (__mmask16) __U,
13446 _MM_FROUND_CUR_DIRECTION);
13447}
13448
13449extern __inline __m512
13450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13451_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13452{
fe7f972d 13453 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
756c5857 13454 (__v16sf) __B,
fe7f972d 13455 (__v16sf) __C,
756c5857
AI
13456 (__mmask16) __U,
13457 _MM_FROUND_CUR_DIRECTION);
13458}
13459
13460extern __inline __m512d
13461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13462_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
13463{
13464 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13465 (__v8df) __B,
13466 (__v8df) __C,
13467 (__mmask8) -1,
13468 _MM_FROUND_CUR_DIRECTION);
13469}
13470
13471extern __inline __m512d
13472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13473_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13474{
13475 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13476 (__v8df) __B,
13477 (__v8df) __C,
13478 (__mmask8) __U,
13479 _MM_FROUND_CUR_DIRECTION);
13480}
13481
13482extern __inline __m512d
13483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13484_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13485{
13486 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
13487 (__v8df) __B,
13488 (__v8df) __C,
13489 (__mmask8) __U,
13490 _MM_FROUND_CUR_DIRECTION);
13491}
13492
13493extern __inline __m512d
13494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13495_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13496{
13497 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
13498 (__v8df) __B,
13499 (__v8df) __C,
13500 (__mmask8) __U,
13501 _MM_FROUND_CUR_DIRECTION);
13502}
13503
13504extern __inline __m512
13505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13506_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
13507{
13508 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13509 (__v16sf) __B,
13510 (__v16sf) __C,
13511 (__mmask16) -1,
13512 _MM_FROUND_CUR_DIRECTION);
13513}
13514
13515extern __inline __m512
13516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13517_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13518{
13519 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13520 (__v16sf) __B,
13521 (__v16sf) __C,
13522 (__mmask16) __U,
13523 _MM_FROUND_CUR_DIRECTION);
13524}
13525
13526extern __inline __m512
13527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13528_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13529{
13530 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
13531 (__v16sf) __B,
13532 (__v16sf) __C,
13533 (__mmask16) __U,
13534 _MM_FROUND_CUR_DIRECTION);
13535}
13536
13537extern __inline __m512
13538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13539_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13540{
13541 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
13542 (__v16sf) __B,
13543 (__v16sf) __C,
13544 (__mmask16) __U,
13545 _MM_FROUND_CUR_DIRECTION);
13546}
13547
13548extern __inline __m512d
13549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13550_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
13551{
13552 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13553 (__v8df) __B,
13554 -(__v8df) __C,
13555 (__mmask8) -1,
13556 _MM_FROUND_CUR_DIRECTION);
13557}
13558
13559extern __inline __m512d
13560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13561_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13562{
13563 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13564 (__v8df) __B,
13565 -(__v8df) __C,
13566 (__mmask8) __U,
13567 _MM_FROUND_CUR_DIRECTION);
13568}
13569
13570extern __inline __m512d
13571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13572_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13573{
13574 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
13575 (__v8df) __B,
13576 (__v8df) __C,
13577 (__mmask8) __U,
13578 _MM_FROUND_CUR_DIRECTION);
13579}
13580
13581extern __inline __m512d
13582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13583_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13584{
13585 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
13586 (__v8df) __B,
13587 -(__v8df) __C,
13588 (__mmask8) __U,
13589 _MM_FROUND_CUR_DIRECTION);
13590}
13591
13592extern __inline __m512
13593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13594_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
13595{
13596 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13597 (__v16sf) __B,
13598 -(__v16sf) __C,
13599 (__mmask16) -1,
13600 _MM_FROUND_CUR_DIRECTION);
13601}
13602
13603extern __inline __m512
13604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13605_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13606{
13607 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13608 (__v16sf) __B,
13609 -(__v16sf) __C,
13610 (__mmask16) __U,
13611 _MM_FROUND_CUR_DIRECTION);
13612}
13613
13614extern __inline __m512
13615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13616_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13617{
13618 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
13619 (__v16sf) __B,
13620 (__v16sf) __C,
13621 (__mmask16) __U,
13622 _MM_FROUND_CUR_DIRECTION);
13623}
13624
13625extern __inline __m512
13626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13627_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13628{
13629 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
13630 (__v16sf) __B,
13631 -(__v16sf) __C,
13632 (__mmask16) __U,
13633 _MM_FROUND_CUR_DIRECTION);
13634}
13635
13636extern __inline __m512d
13637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13638_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
13639{
5ca94977
L
13640 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
13641 (__v8df) __B,
13642 (__v8df) __C,
13643 (__mmask8) -1,
13644 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13645}
13646
13647extern __inline __m512d
13648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13649_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13650{
13651 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
13652 (__v8df) __B,
13653 (__v8df) __C,
13654 (__mmask8) __U,
13655 _MM_FROUND_CUR_DIRECTION);
13656}
13657
13658extern __inline __m512d
13659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13660_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13661{
5ca94977
L
13662 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
13663 (__v8df) __B,
13664 (__v8df) __C,
13665 (__mmask8) __U,
13666 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13667}
13668
13669extern __inline __m512d
13670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13671_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13672{
5ca94977
L
13673 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
13674 (__v8df) __B,
13675 (__v8df) __C,
13676 (__mmask8) __U,
13677 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13678}
13679
13680extern __inline __m512
13681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13682_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
13683{
5ca94977
L
13684 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
13685 (__v16sf) __B,
13686 (__v16sf) __C,
13687 (__mmask16) -1,
13688 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13689}
13690
13691extern __inline __m512
13692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13693_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13694{
13695 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
13696 (__v16sf) __B,
13697 (__v16sf) __C,
13698 (__mmask16) __U,
13699 _MM_FROUND_CUR_DIRECTION);
13700}
13701
13702extern __inline __m512
13703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13704_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13705{
5ca94977
L
13706 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
13707 (__v16sf) __B,
13708 (__v16sf) __C,
13709 (__mmask16) __U,
13710 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13711}
13712
13713extern __inline __m512
13714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13715_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13716{
5ca94977
L
13717 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
13718 (__v16sf) __B,
13719 (__v16sf) __C,
13720 (__mmask16) __U,
13721 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13722}
13723
13724extern __inline __m512d
13725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13726_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
13727{
38ef6fb1
L
13728 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
13729 (__v8df) __B,
13730 (__v8df) __C,
13731 (__mmask8) -1,
13732 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13733}
13734
13735extern __inline __m512d
13736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13737_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13738{
13739 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
13740 (__v8df) __B,
13741 (__v8df) __C,
13742 (__mmask8) __U,
13743 _MM_FROUND_CUR_DIRECTION);
13744}
13745
13746extern __inline __m512d
13747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13748_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13749{
13750 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
13751 (__v8df) __B,
13752 (__v8df) __C,
13753 (__mmask8) __U,
13754 _MM_FROUND_CUR_DIRECTION);
13755}
13756
13757extern __inline __m512d
13758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13759_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13760{
38ef6fb1
L
13761 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
13762 (__v8df) __B,
13763 (__v8df) __C,
13764 (__mmask8) __U,
13765 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13766}
13767
13768extern __inline __m512
13769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13770_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
13771{
38ef6fb1
L
13772 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
13773 (__v16sf) __B,
13774 (__v16sf) __C,
13775 (__mmask16) -1,
13776 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13777}
13778
13779extern __inline __m512
13780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13781_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13782{
13783 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
13784 (__v16sf) __B,
13785 (__v16sf) __C,
13786 (__mmask16) __U,
13787 _MM_FROUND_CUR_DIRECTION);
13788}
13789
13790extern __inline __m512
13791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13792_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13793{
13794 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
13795 (__v16sf) __B,
13796 (__v16sf) __C,
13797 (__mmask16) __U,
13798 _MM_FROUND_CUR_DIRECTION);
13799}
13800
13801extern __inline __m512
13802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13803_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13804{
38ef6fb1
L
13805 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
13806 (__v16sf) __B,
13807 (__v16sf) __C,
13808 (__mmask16) __U,
13809 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13810}
13811
13812extern __inline __m256i
13813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13814_mm512_cvttpd_epi32 (__m512d __A)
13815{
13816 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13817 (__v8si)
0b192937 13818 _mm256_undefined_si256 (),
756c5857
AI
13819 (__mmask8) -1,
13820 _MM_FROUND_CUR_DIRECTION);
13821}
13822
13823extern __inline __m256i
13824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13825_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
13826{
13827 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13828 (__v8si) __W,
13829 (__mmask8) __U,
13830 _MM_FROUND_CUR_DIRECTION);
13831}
13832
13833extern __inline __m256i
13834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13835_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
13836{
13837 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13838 (__v8si)
13839 _mm256_setzero_si256 (),
13840 (__mmask8) __U,
13841 _MM_FROUND_CUR_DIRECTION);
13842}
13843
13844extern __inline __m256i
13845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13846_mm512_cvttpd_epu32 (__m512d __A)
13847{
13848 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13849 (__v8si)
0b192937 13850 _mm256_undefined_si256 (),
756c5857
AI
13851 (__mmask8) -1,
13852 _MM_FROUND_CUR_DIRECTION);
13853}
13854
13855extern __inline __m256i
13856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13857_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
13858{
13859 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13860 (__v8si) __W,
13861 (__mmask8) __U,
13862 _MM_FROUND_CUR_DIRECTION);
13863}
13864
13865extern __inline __m256i
13866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13867_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
13868{
13869 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13870 (__v8si)
13871 _mm256_setzero_si256 (),
13872 (__mmask8) __U,
13873 _MM_FROUND_CUR_DIRECTION);
13874}
13875
13876extern __inline __m256i
13877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13878_mm512_cvtpd_epi32 (__m512d __A)
13879{
13880 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13881 (__v8si)
0b192937 13882 _mm256_undefined_si256 (),
756c5857
AI
13883 (__mmask8) -1,
13884 _MM_FROUND_CUR_DIRECTION);
13885}
13886
13887extern __inline __m256i
13888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13889_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
13890{
13891 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13892 (__v8si) __W,
13893 (__mmask8) __U,
13894 _MM_FROUND_CUR_DIRECTION);
13895}
13896
13897extern __inline __m256i
13898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13899_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
13900{
13901 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13902 (__v8si)
13903 _mm256_setzero_si256 (),
13904 (__mmask8) __U,
13905 _MM_FROUND_CUR_DIRECTION);
13906}
13907
13908extern __inline __m256i
13909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13910_mm512_cvtpd_epu32 (__m512d __A)
13911{
13912 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13913 (__v8si)
0b192937 13914 _mm256_undefined_si256 (),
756c5857
AI
13915 (__mmask8) -1,
13916 _MM_FROUND_CUR_DIRECTION);
13917}
13918
13919extern __inline __m256i
13920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13921_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
13922{
13923 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13924 (__v8si) __W,
13925 (__mmask8) __U,
13926 _MM_FROUND_CUR_DIRECTION);
13927}
13928
13929extern __inline __m256i
13930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13931_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
13932{
13933 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13934 (__v8si)
13935 _mm256_setzero_si256 (),
13936 (__mmask8) __U,
13937 _MM_FROUND_CUR_DIRECTION);
13938}
13939
13940extern __inline __m512i
13941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13942_mm512_cvttps_epi32 (__m512 __A)
13943{
13944 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13945 (__v16si)
4271e5cb 13946 _mm512_undefined_epi32 (),
756c5857
AI
13947 (__mmask16) -1,
13948 _MM_FROUND_CUR_DIRECTION);
13949}
13950
13951extern __inline __m512i
13952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13953_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
13954{
13955 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13956 (__v16si) __W,
13957 (__mmask16) __U,
13958 _MM_FROUND_CUR_DIRECTION);
13959}
13960
13961extern __inline __m512i
13962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13963_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
13964{
13965 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13966 (__v16si)
13967 _mm512_setzero_si512 (),
13968 (__mmask16) __U,
13969 _MM_FROUND_CUR_DIRECTION);
13970}
13971
13972extern __inline __m512i
13973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13974_mm512_cvttps_epu32 (__m512 __A)
13975{
13976 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13977 (__v16si)
4271e5cb 13978 _mm512_undefined_epi32 (),
756c5857
AI
13979 (__mmask16) -1,
13980 _MM_FROUND_CUR_DIRECTION);
13981}
13982
13983extern __inline __m512i
13984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13985_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13986{
13987 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13988 (__v16si) __W,
13989 (__mmask16) __U,
13990 _MM_FROUND_CUR_DIRECTION);
13991}
13992
13993extern __inline __m512i
13994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13995_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
13996{
13997 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13998 (__v16si)
13999 _mm512_setzero_si512 (),
14000 (__mmask16) __U,
14001 _MM_FROUND_CUR_DIRECTION);
14002}
14003
14004extern __inline __m512i
14005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14006_mm512_cvtps_epi32 (__m512 __A)
14007{
14008 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14009 (__v16si)
4271e5cb 14010 _mm512_undefined_epi32 (),
756c5857
AI
14011 (__mmask16) -1,
14012 _MM_FROUND_CUR_DIRECTION);
14013}
14014
14015extern __inline __m512i
14016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14017_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
14018{
14019 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14020 (__v16si) __W,
14021 (__mmask16) __U,
14022 _MM_FROUND_CUR_DIRECTION);
14023}
14024
14025extern __inline __m512i
14026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14027_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
14028{
14029 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14030 (__v16si)
14031 _mm512_setzero_si512 (),
14032 (__mmask16) __U,
14033 _MM_FROUND_CUR_DIRECTION);
14034}
14035
14036extern __inline __m512i
14037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14038_mm512_cvtps_epu32 (__m512 __A)
14039{
14040 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14041 (__v16si)
4271e5cb 14042 _mm512_undefined_epi32 (),
756c5857
AI
14043 (__mmask16) -1,
14044 _MM_FROUND_CUR_DIRECTION);
14045}
14046
14047extern __inline __m512i
14048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14049_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
14050{
14051 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14052 (__v16si) __W,
14053 (__mmask16) __U,
14054 _MM_FROUND_CUR_DIRECTION);
14055}
14056
14057extern __inline __m512i
14058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14059_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
14060{
14061 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14062 (__v16si)
14063 _mm512_setzero_si512 (),
14064 (__mmask16) __U,
14065 _MM_FROUND_CUR_DIRECTION);
14066}
14067
dcb2c527
JJ
14068extern __inline double
14069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14070_mm512_cvtsd_f64 (__m512d __A)
14071{
14072 return __A[0];
14073}
14074
14075extern __inline float
14076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14077_mm512_cvtss_f32 (__m512 __A)
14078{
14079 return __A[0];
14080}
14081
756c5857
AI
14082#ifdef __x86_64__
14083extern __inline __m128
14084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14085_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
14086{
14087 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
14088 _MM_FROUND_CUR_DIRECTION);
14089}
14090
14091extern __inline __m128d
14092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14093_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
14094{
14095 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
14096 _MM_FROUND_CUR_DIRECTION);
14097}
14098#endif
14099
14100extern __inline __m128
14101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14102_mm_cvtu32_ss (__m128 __A, unsigned __B)
14103{
14104 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
14105 _MM_FROUND_CUR_DIRECTION);
14106}
14107
14108extern __inline __m512
14109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14110_mm512_cvtepi32_ps (__m512i __A)
14111{
14112 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14113 (__v16sf)
0b192937 14114 _mm512_undefined_ps (),
756c5857
AI
14115 (__mmask16) -1,
14116 _MM_FROUND_CUR_DIRECTION);
14117}
14118
14119extern __inline __m512
14120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14121_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
14122{
14123 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14124 (__v16sf) __W,
14125 (__mmask16) __U,
14126 _MM_FROUND_CUR_DIRECTION);
14127}
14128
14129extern __inline __m512
14130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14131_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
14132{
14133 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14134 (__v16sf)
14135 _mm512_setzero_ps (),
14136 (__mmask16) __U,
14137 _MM_FROUND_CUR_DIRECTION);
14138}
14139
14140extern __inline __m512
14141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14142_mm512_cvtepu32_ps (__m512i __A)
14143{
14144 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14145 (__v16sf)
0b192937 14146 _mm512_undefined_ps (),
756c5857
AI
14147 (__mmask16) -1,
14148 _MM_FROUND_CUR_DIRECTION);
14149}
14150
14151extern __inline __m512
14152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14153_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
14154{
14155 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14156 (__v16sf) __W,
14157 (__mmask16) __U,
14158 _MM_FROUND_CUR_DIRECTION);
14159}
14160
14161extern __inline __m512
14162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14163_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
14164{
14165 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14166 (__v16sf)
14167 _mm512_setzero_ps (),
14168 (__mmask16) __U,
14169 _MM_FROUND_CUR_DIRECTION);
14170}
14171
14172#ifdef __OPTIMIZE__
14173extern __inline __m512d
14174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14175_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
756c5857 14176{
040d2bba
WX
14177 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
14178 (__v8df) __B,
14179 (__v8di) __C,
756c5857 14180 __imm,
040d2bba 14181 (__mmask8) -1,
756c5857
AI
14182 _MM_FROUND_CUR_DIRECTION);
14183}
14184
14185extern __inline __m512d
14186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14187_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
14188 __m512i __C, const int __imm)
756c5857
AI
14189{
14190 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
040d2bba
WX
14191 (__v8df) __B,
14192 (__v8di) __C,
756c5857
AI
14193 __imm,
14194 (__mmask8) __U,
14195 _MM_FROUND_CUR_DIRECTION);
14196}
14197
14198extern __inline __m512d
14199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14200_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
14201 __m512i __C, const int __imm)
756c5857
AI
14202{
14203 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
040d2bba
WX
14204 (__v8df) __B,
14205 (__v8di) __C,
756c5857
AI
14206 __imm,
14207 (__mmask8) __U,
14208 _MM_FROUND_CUR_DIRECTION);
14209}
14210
14211extern __inline __m512
14212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14213_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
756c5857 14214{
040d2bba
WX
14215 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
14216 (__v16sf) __B,
14217 (__v16si) __C,
756c5857 14218 __imm,
040d2bba 14219 (__mmask16) -1,
756c5857
AI
14220 _MM_FROUND_CUR_DIRECTION);
14221}
14222
14223extern __inline __m512
14224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14225_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
14226 __m512i __C, const int __imm)
756c5857
AI
14227{
14228 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
040d2bba
WX
14229 (__v16sf) __B,
14230 (__v16si) __C,
756c5857
AI
14231 __imm,
14232 (__mmask16) __U,
14233 _MM_FROUND_CUR_DIRECTION);
14234}
14235
14236extern __inline __m512
14237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14238_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
14239 __m512i __C, const int __imm)
756c5857
AI
14240{
14241 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
040d2bba
WX
14242 (__v16sf) __B,
14243 (__v16si) __C,
756c5857
AI
14244 __imm,
14245 (__mmask16) __U,
14246 _MM_FROUND_CUR_DIRECTION);
14247}
14248
14249extern __inline __m128d
14250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14251_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
756c5857 14252{
040d2bba
WX
14253 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
14254 (__v2df) __B,
14255 (__v2di) __C, __imm,
14256 (__mmask8) -1,
756c5857
AI
14257 _MM_FROUND_CUR_DIRECTION);
14258}
14259
14260extern __inline __m128d
14261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14262_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
14263 __m128i __C, const int __imm)
756c5857
AI
14264{
14265 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
040d2bba
WX
14266 (__v2df) __B,
14267 (__v2di) __C, __imm,
756c5857
AI
14268 (__mmask8) __U,
14269 _MM_FROUND_CUR_DIRECTION);
14270}
14271
14272extern __inline __m128d
14273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14274_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
14275 __m128i __C, const int __imm)
756c5857
AI
14276{
14277 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
040d2bba
WX
14278 (__v2df) __B,
14279 (__v2di) __C,
756c5857
AI
14280 __imm,
14281 (__mmask8) __U,
14282 _MM_FROUND_CUR_DIRECTION);
14283}
14284
14285extern __inline __m128
14286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14287_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
756c5857 14288{
040d2bba
WX
14289 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
14290 (__v4sf) __B,
14291 (__v4si) __C, __imm,
14292 (__mmask8) -1,
756c5857
AI
14293 _MM_FROUND_CUR_DIRECTION);
14294}
14295
14296extern __inline __m128
14297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14298_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
14299 __m128i __C, const int __imm)
756c5857
AI
14300{
14301 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
040d2bba
WX
14302 (__v4sf) __B,
14303 (__v4si) __C, __imm,
756c5857
AI
14304 (__mmask8) __U,
14305 _MM_FROUND_CUR_DIRECTION);
14306}
14307
14308extern __inline __m128
14309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14310_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
14311 __m128i __C, const int __imm)
756c5857
AI
14312{
14313 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
040d2bba
WX
14314 (__v4sf) __B,
14315 (__v4si) __C, __imm,
756c5857
AI
14316 (__mmask8) __U,
14317 _MM_FROUND_CUR_DIRECTION);
14318}
14319#else
040d2bba
WX
14320#define _mm512_fixupimm_pd(X, Y, Z, C) \
14321 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
14322 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
14323 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14324
040d2bba 14325#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
756c5857 14326 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
040d2bba 14327 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
14328 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14329
040d2bba 14330#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
756c5857 14331 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
040d2bba 14332 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
14333 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14334
040d2bba
WX
14335#define _mm512_fixupimm_ps(X, Y, Z, C) \
14336 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
14337 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
14338 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14339
040d2bba 14340#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
756c5857 14341 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
040d2bba 14342 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
756c5857
AI
14343 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14344
040d2bba 14345#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
756c5857 14346 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
040d2bba 14347 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
756c5857
AI
14348 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14349
040d2bba
WX
14350#define _mm_fixupimm_sd(X, Y, Z, C) \
14351 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
14352 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
14353 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14354
040d2bba 14355#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
756c5857 14356 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
040d2bba 14357 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
756c5857
AI
14358 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14359
040d2bba 14360#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
756c5857 14361 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
040d2bba 14362 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
756c5857
AI
14363 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14364
040d2bba
WX
14365#define _mm_fixupimm_ss(X, Y, Z, C) \
14366 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
14367 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
14368 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14369
040d2bba 14370#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
756c5857 14371 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
040d2bba 14372 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
756c5857
AI
14373 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14374
040d2bba 14375#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
756c5857 14376 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
040d2bba 14377 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
756c5857
AI
14378 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14379#endif
14380
14381#ifdef __x86_64__
14382extern __inline unsigned long long
14383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14384_mm_cvtss_u64 (__m128 __A)
14385{
14386 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
14387 __A,
14388 _MM_FROUND_CUR_DIRECTION);
14389}
14390
14391extern __inline unsigned long long
14392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14393_mm_cvttss_u64 (__m128 __A)
14394{
14395 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
14396 __A,
14397 _MM_FROUND_CUR_DIRECTION);
14398}
14399
14400extern __inline long long
14401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14402_mm_cvttss_i64 (__m128 __A)
14403{
14404 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
14405 _MM_FROUND_CUR_DIRECTION);
14406}
14407#endif /* __x86_64__ */
14408
93103603
SP
14409extern __inline int
14410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14411_mm512_cvtsi512_si32 (__m512i __A)
14412{
14413 __v16si __B = (__v16si) __A;
14414 return __B[0];
14415}
14416
756c5857
AI
14417extern __inline unsigned
14418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14419_mm_cvtss_u32 (__m128 __A)
14420{
14421 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
14422 _MM_FROUND_CUR_DIRECTION);
14423}
14424
14425extern __inline unsigned
14426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14427_mm_cvttss_u32 (__m128 __A)
14428{
14429 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
14430 _MM_FROUND_CUR_DIRECTION);
14431}
14432
14433extern __inline int
14434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14435_mm_cvttss_i32 (__m128 __A)
14436{
14437 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
14438 _MM_FROUND_CUR_DIRECTION);
14439}
14440
93103603
SP
14441extern __inline int
14442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14443_mm_cvtsd_i32 (__m128d __A)
14444{
14445 return (int) __builtin_ia32_cvtsd2si ((__v2df) __A);
14446}
14447
14448extern __inline int
14449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14450_mm_cvtss_i32 (__m128 __A)
14451{
14452 return (int) __builtin_ia32_cvtss2si ((__v4sf) __A);
14453}
14454
14455extern __inline __m128d
14456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14457_mm_cvti32_sd (__m128d __A, int __B)
14458{
14459 return (__m128d) __builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
14460}
14461
14462extern __inline __m128
14463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14464_mm_cvti32_ss (__m128 __A, int __B)
14465{
14466 return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
14467}
14468
756c5857
AI
14469#ifdef __x86_64__
14470extern __inline unsigned long long
14471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14472_mm_cvtsd_u64 (__m128d __A)
14473{
14474 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
14475 __A,
14476 _MM_FROUND_CUR_DIRECTION);
14477}
14478
14479extern __inline unsigned long long
14480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14481_mm_cvttsd_u64 (__m128d __A)
14482{
14483 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
14484 __A,
14485 _MM_FROUND_CUR_DIRECTION);
14486}
14487
14488extern __inline long long
14489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14490_mm_cvttsd_i64 (__m128d __A)
14491{
14492 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
14493 _MM_FROUND_CUR_DIRECTION);
14494}
93103603
SP
14495
14496extern __inline long long
14497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14498_mm_cvtsd_i64 (__m128d __A)
14499{
14500 return (long long) __builtin_ia32_cvtsd2si64 ((__v2df) __A);
14501}
14502
14503extern __inline long long
14504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14505_mm_cvtss_i64 (__m128 __A)
14506{
14507 return (long long) __builtin_ia32_cvtss2si64 ((__v4sf) __A);
14508}
14509
14510extern __inline __m128d
14511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14512_mm_cvti64_sd (__m128d __A, long long __B)
14513{
14514 return (__m128d) __builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
14515}
14516
14517extern __inline __m128
14518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14519_mm_cvti64_ss (__m128 __A, long long __B)
14520{
14521 return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
14522}
756c5857
AI
14523#endif /* __x86_64__ */
14524
14525extern __inline unsigned
14526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14527_mm_cvtsd_u32 (__m128d __A)
14528{
14529 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
14530 _MM_FROUND_CUR_DIRECTION);
14531}
14532
14533extern __inline unsigned
14534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14535_mm_cvttsd_u32 (__m128d __A)
14536{
14537 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
14538 _MM_FROUND_CUR_DIRECTION);
14539}
14540
14541extern __inline int
14542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14543_mm_cvttsd_i32 (__m128d __A)
14544{
14545 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
14546 _MM_FROUND_CUR_DIRECTION);
14547}
14548
14549extern __inline __m512d
14550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14551_mm512_cvtps_pd (__m256 __A)
14552{
14553 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14554 (__v8df)
0b192937 14555 _mm512_undefined_pd (),
756c5857
AI
14556 (__mmask8) -1,
14557 _MM_FROUND_CUR_DIRECTION);
14558}
14559
14560extern __inline __m512d
14561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14562_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
14563{
14564 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14565 (__v8df) __W,
14566 (__mmask8) __U,
14567 _MM_FROUND_CUR_DIRECTION);
14568}
14569
14570extern __inline __m512d
14571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14572_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
14573{
14574 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14575 (__v8df)
14576 _mm512_setzero_pd (),
14577 (__mmask8) __U,
14578 _MM_FROUND_CUR_DIRECTION);
14579}
14580
14581extern __inline __m512
14582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14583_mm512_cvtph_ps (__m256i __A)
14584{
14585 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14586 (__v16sf)
0b192937 14587 _mm512_undefined_ps (),
756c5857
AI
14588 (__mmask16) -1,
14589 _MM_FROUND_CUR_DIRECTION);
14590}
14591
14592extern __inline __m512
14593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14594_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
14595{
14596 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14597 (__v16sf) __W,
14598 (__mmask16) __U,
14599 _MM_FROUND_CUR_DIRECTION);
14600}
14601
14602extern __inline __m512
14603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14604_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
14605{
14606 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14607 (__v16sf)
14608 _mm512_setzero_ps (),
14609 (__mmask16) __U,
14610 _MM_FROUND_CUR_DIRECTION);
14611}
14612
14613extern __inline __m256
14614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14615_mm512_cvtpd_ps (__m512d __A)
14616{
14617 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14618 (__v8sf)
0b192937 14619 _mm256_undefined_ps (),
756c5857
AI
14620 (__mmask8) -1,
14621 _MM_FROUND_CUR_DIRECTION);
14622}
14623
14624extern __inline __m256
14625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14626_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
14627{
14628 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14629 (__v8sf) __W,
14630 (__mmask8) __U,
14631 _MM_FROUND_CUR_DIRECTION);
14632}
14633
14634extern __inline __m256
14635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14636_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
14637{
14638 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14639 (__v8sf)
14640 _mm256_setzero_ps (),
14641 (__mmask8) __U,
14642 _MM_FROUND_CUR_DIRECTION);
14643}
14644
14645#ifdef __OPTIMIZE__
14646extern __inline __m512
14647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14648_mm512_getexp_ps (__m512 __A)
14649{
14650 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14651 (__v16sf)
0b192937 14652 _mm512_undefined_ps (),
756c5857
AI
14653 (__mmask16) -1,
14654 _MM_FROUND_CUR_DIRECTION);
14655}
14656
14657extern __inline __m512
14658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14659_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
14660{
14661 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14662 (__v16sf) __W,
14663 (__mmask16) __U,
14664 _MM_FROUND_CUR_DIRECTION);
14665}
14666
14667extern __inline __m512
14668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14669_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
14670{
14671 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14672 (__v16sf)
14673 _mm512_setzero_ps (),
14674 (__mmask16) __U,
14675 _MM_FROUND_CUR_DIRECTION);
14676}
14677
14678extern __inline __m512d
14679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14680_mm512_getexp_pd (__m512d __A)
14681{
14682 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14683 (__v8df)
0b192937 14684 _mm512_undefined_pd (),
756c5857
AI
14685 (__mmask8) -1,
14686 _MM_FROUND_CUR_DIRECTION);
14687}
14688
14689extern __inline __m512d
14690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14691_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
14692{
14693 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14694 (__v8df) __W,
14695 (__mmask8) __U,
14696 _MM_FROUND_CUR_DIRECTION);
14697}
14698
14699extern __inline __m512d
14700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14701_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
14702{
14703 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14704 (__v8df)
14705 _mm512_setzero_pd (),
14706 (__mmask8) __U,
14707 _MM_FROUND_CUR_DIRECTION);
14708}
14709
075691af
AI
14710extern __inline __m128
14711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14712_mm_getexp_ss (__m128 __A, __m128 __B)
14713{
14714 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
14715 (__v4sf) __B,
14716 _MM_FROUND_CUR_DIRECTION);
14717}
14718
68d872d7
SP
14719extern __inline __m128
14720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14721_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
14722{
14723 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
14724 (__v4sf) __B,
14725 (__v4sf) __W,
14726 (__mmask8) __U,
14727 _MM_FROUND_CUR_DIRECTION);
14728}
14729
14730extern __inline __m128
14731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14732_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
14733{
14734 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
14735 (__v4sf) __B,
14736 (__v4sf)
14737 _mm_setzero_ps (),
14738 (__mmask8) __U,
14739 _MM_FROUND_CUR_DIRECTION);
14740}
14741
075691af
AI
14742extern __inline __m128d
14743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14744_mm_getexp_sd (__m128d __A, __m128d __B)
14745{
14746 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
14747 (__v2df) __B,
14748 _MM_FROUND_CUR_DIRECTION);
14749}
14750
68d872d7
SP
14751extern __inline __m128d
14752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14753_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
14754{
14755 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
14756 (__v2df) __B,
14757 (__v2df) __W,
14758 (__mmask8) __U,
14759 _MM_FROUND_CUR_DIRECTION);
14760}
14761
14762extern __inline __m128d
14763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14764_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
14765{
14766 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
14767 (__v2df) __B,
14768 (__v2df)
14769 _mm_setzero_pd (),
14770 (__mmask8) __U,
14771 _MM_FROUND_CUR_DIRECTION);
14772}
14773
756c5857
AI
14774extern __inline __m512d
14775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14776_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
14777 _MM_MANTISSA_SIGN_ENUM __C)
14778{
14779 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14780 (__C << 2) | __B,
0b192937 14781 _mm512_undefined_pd (),
756c5857
AI
14782 (__mmask8) -1,
14783 _MM_FROUND_CUR_DIRECTION);
14784}
14785
14786extern __inline __m512d
14787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14788_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
14789 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14790{
14791 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14792 (__C << 2) | __B,
14793 (__v8df) __W, __U,
14794 _MM_FROUND_CUR_DIRECTION);
14795}
14796
14797extern __inline __m512d
14798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14799_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
14800 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14801{
14802 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14803 (__C << 2) | __B,
14804 (__v8df)
14805 _mm512_setzero_pd (),
14806 __U,
14807 _MM_FROUND_CUR_DIRECTION);
14808}
14809
14810extern __inline __m512
14811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14812_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
14813 _MM_MANTISSA_SIGN_ENUM __C)
14814{
14815 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14816 (__C << 2) | __B,
0b192937 14817 _mm512_undefined_ps (),
756c5857
AI
14818 (__mmask16) -1,
14819 _MM_FROUND_CUR_DIRECTION);
14820}
14821
14822extern __inline __m512
14823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14824_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
14825 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14826{
14827 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14828 (__C << 2) | __B,
14829 (__v16sf) __W, __U,
14830 _MM_FROUND_CUR_DIRECTION);
14831}
14832
14833extern __inline __m512
14834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14835_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
14836 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14837{
14838 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14839 (__C << 2) | __B,
14840 (__v16sf)
14841 _mm512_setzero_ps (),
14842 __U,
14843 _MM_FROUND_CUR_DIRECTION);
14844}
14845
075691af
AI
14846extern __inline __m128d
14847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14848_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
14849 _MM_MANTISSA_SIGN_ENUM __D)
14850{
14851 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
14852 (__v2df) __B,
14853 (__D << 2) | __C,
14854 _MM_FROUND_CUR_DIRECTION);
14855}
14856
68d872d7
SP
14857extern __inline __m128d
14858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14859_mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
14860 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14861{
14862 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
14863 (__v2df) __B,
14864 (__D << 2) | __C,
14865 (__v2df) __W,
14866 __U,
14867 _MM_FROUND_CUR_DIRECTION);
14868}
14869
14870extern __inline __m128d
14871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14872_mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
14873 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14874{
14875 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
14876 (__v2df) __B,
14877 (__D << 2) | __C,
14878 (__v2df)
14879 _mm_setzero_pd(),
14880 __U,
14881 _MM_FROUND_CUR_DIRECTION);
14882}
14883
075691af
AI
14884extern __inline __m128
14885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14886_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
14887 _MM_MANTISSA_SIGN_ENUM __D)
14888{
14889 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
14890 (__v4sf) __B,
14891 (__D << 2) | __C,
14892 _MM_FROUND_CUR_DIRECTION);
14893}
14894
68d872d7
SP
14895extern __inline __m128
14896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14897_mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
14898 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14899{
14900 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
14901 (__v4sf) __B,
14902 (__D << 2) | __C,
14903 (__v4sf) __W,
14904 __U,
14905 _MM_FROUND_CUR_DIRECTION);
14906}
14907
14908extern __inline __m128
14909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14910_mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
14911 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14912{
14913 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
14914 (__v4sf) __B,
14915 (__D << 2) | __C,
14916 (__v4sf)
14917 _mm_setzero_ps(),
14918 __U,
14919 _MM_FROUND_CUR_DIRECTION);
14920}
14921
756c5857
AI
14922#else
14923#define _mm512_getmant_pd(X, B, C) \
14924 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14925 (int)(((C)<<2) | (B)), \
0b192937 14926 (__v8df)_mm512_undefined_pd(), \
756c5857
AI
14927 (__mmask8)-1,\
14928 _MM_FROUND_CUR_DIRECTION))
14929
14930#define _mm512_mask_getmant_pd(W, U, X, B, C) \
14931 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14932 (int)(((C)<<2) | (B)), \
14933 (__v8df)(__m512d)(W), \
14934 (__mmask8)(U),\
14935 _MM_FROUND_CUR_DIRECTION))
14936
14937#define _mm512_maskz_getmant_pd(U, X, B, C) \
14938 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14939 (int)(((C)<<2) | (B)), \
0b192937 14940 (__v8df)_mm512_setzero_pd(), \
756c5857
AI
14941 (__mmask8)(U),\
14942 _MM_FROUND_CUR_DIRECTION))
14943#define _mm512_getmant_ps(X, B, C) \
14944 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
14945 (int)(((C)<<2) | (B)), \
0b192937 14946 (__v16sf)_mm512_undefined_ps(), \
756c5857
AI
14947 (__mmask16)-1,\
14948 _MM_FROUND_CUR_DIRECTION))
14949
14950#define _mm512_mask_getmant_ps(W, U, X, B, C) \
14951 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
14952 (int)(((C)<<2) | (B)), \
14953 (__v16sf)(__m512)(W), \
14954 (__mmask16)(U),\
14955 _MM_FROUND_CUR_DIRECTION))
14956
14957#define _mm512_maskz_getmant_ps(U, X, B, C) \
14958 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
14959 (int)(((C)<<2) | (B)), \
0b192937 14960 (__v16sf)_mm512_setzero_ps(), \
756c5857
AI
14961 (__mmask16)(U),\
14962 _MM_FROUND_CUR_DIRECTION))
075691af
AI
14963#define _mm_getmant_sd(X, Y, C, D) \
14964 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
14965 (__v2df)(__m128d)(Y), \
14966 (int)(((D)<<2) | (C)), \
14967 _MM_FROUND_CUR_DIRECTION))
14968
68d872d7
SP
14969#define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
14970 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
14971 (__v2df)(__m128d)(Y), \
14972 (int)(((D)<<2) | (C)), \
14973 (__v2df)(__m128d)(W), \
14974 (__mmask8)(U),\
14975 _MM_FROUND_CUR_DIRECTION))
14976
14977#define _mm_maskz_getmant_sd(U, X, Y, C, D) \
14978 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
14979 (__v2df)(__m128d)(Y), \
14980 (int)(((D)<<2) | (C)), \
14981 (__v2df)_mm_setzero_pd(), \
14982 (__mmask8)(U),\
14983 _MM_FROUND_CUR_DIRECTION))
14984
075691af
AI
14985#define _mm_getmant_ss(X, Y, C, D) \
14986 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
14987 (__v4sf)(__m128)(Y), \
14988 (int)(((D)<<2) | (C)), \
14989 _MM_FROUND_CUR_DIRECTION))
14990
68d872d7
SP
14991#define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
14992 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
14993 (__v4sf)(__m128)(Y), \
14994 (int)(((D)<<2) | (C)), \
14995 (__v4sf)(__m128)(W), \
14996 (__mmask8)(U),\
14997 _MM_FROUND_CUR_DIRECTION))
14998
14999#define _mm_maskz_getmant_ss(U, X, Y, C, D) \
15000 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
15001 (__v4sf)(__m128)(Y), \
15002 (int)(((D)<<2) | (C)), \
15003 (__v4sf)_mm_setzero_ps(), \
15004 (__mmask8)(U),\
15005 _MM_FROUND_CUR_DIRECTION))
15006
075691af 15007#define _mm_getexp_ss(A, B) \
68d872d7 15008 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
075691af
AI
15009 _MM_FROUND_CUR_DIRECTION))
15010
68d872d7
SP
15011#define _mm_mask_getexp_ss(W, U, A, B) \
15012 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
15013 _MM_FROUND_CUR_DIRECTION)
15014
15015#define _mm_maskz_getexp_ss(U, A, B) \
15016 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
15017 _MM_FROUND_CUR_DIRECTION)
15018
075691af 15019#define _mm_getexp_sd(A, B) \
68d872d7 15020 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
075691af
AI
15021 _MM_FROUND_CUR_DIRECTION))
15022
68d872d7
SP
15023#define _mm_mask_getexp_sd(W, U, A, B) \
15024 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
15025 _MM_FROUND_CUR_DIRECTION)
15026
15027#define _mm_maskz_getexp_sd(U, A, B) \
15028 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
15029 _MM_FROUND_CUR_DIRECTION)
15030
756c5857
AI
15031#define _mm512_getexp_ps(A) \
15032 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 15033 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15034
15035#define _mm512_mask_getexp_ps(W, U, A) \
15036 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
15037 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15038
15039#define _mm512_maskz_getexp_ps(U, A) \
15040 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
15041 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15042
15043#define _mm512_getexp_pd(A) \
15044 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 15045 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15046
15047#define _mm512_mask_getexp_pd(W, U, A) \
15048 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
15049 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15050
15051#define _mm512_maskz_getexp_pd(U, A) \
15052 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
15053 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15054#endif
15055
15056#ifdef __OPTIMIZE__
15057extern __inline __m512
15058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15059_mm512_roundscale_ps (__m512 __A, const int __imm)
15060{
15061 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
15062 (__v16sf)
15063 _mm512_undefined_ps (),
15064 -1,
756c5857
AI
15065 _MM_FROUND_CUR_DIRECTION);
15066}
15067
15068extern __inline __m512
15069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15070_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
15071 const int __imm)
15072{
15073 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
15074 (__v16sf) __A,
15075 (__mmask16) __B,
15076 _MM_FROUND_CUR_DIRECTION);
15077}
15078
15079extern __inline __m512
15080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15081_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
15082{
15083 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
15084 __imm,
15085 (__v16sf)
15086 _mm512_setzero_ps (),
15087 (__mmask16) __A,
15088 _MM_FROUND_CUR_DIRECTION);
15089}
15090
15091extern __inline __m512d
15092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15093_mm512_roundscale_pd (__m512d __A, const int __imm)
15094{
15095 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
15096 (__v8df)
15097 _mm512_undefined_pd (),
15098 -1,
756c5857
AI
15099 _MM_FROUND_CUR_DIRECTION);
15100}
15101
15102extern __inline __m512d
15103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15104_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
15105 const int __imm)
15106{
15107 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
15108 (__v8df) __A,
15109 (__mmask8) __B,
15110 _MM_FROUND_CUR_DIRECTION);
15111}
15112
15113extern __inline __m512d
15114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15115_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
15116{
15117 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
15118 __imm,
15119 (__v8df)
15120 _mm512_setzero_pd (),
15121 (__mmask8) __A,
15122 _MM_FROUND_CUR_DIRECTION);
15123}
15124
075691af
AI
15125extern __inline __m128
15126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15127_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
15128{
a7c4d6d1
HL
15129 return (__m128)
15130 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
15131 (__v4sf) __B, __imm,
15132 (__v4sf)
15133 _mm_setzero_ps (),
15134 (__mmask8) -1,
15135 _MM_FROUND_CUR_DIRECTION);
15136}
15137
15138
15139extern __inline __m128
15140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15141_mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D,
15142 const int __imm)
15143{
15144 return (__m128)
15145 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
15146 (__v4sf) __D, __imm,
15147 (__v4sf) __A,
15148 (__mmask8) __B,
15149 _MM_FROUND_CUR_DIRECTION);
15150}
15151
15152extern __inline __m128
15153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15154_mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
15155 const int __imm)
15156{
15157 return (__m128)
15158 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
15159 (__v4sf) __C, __imm,
15160 (__v4sf)
15161 _mm_setzero_ps (),
15162 (__mmask8) __A,
15163 _MM_FROUND_CUR_DIRECTION);
075691af
AI
15164}
15165
15166extern __inline __m128d
15167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15168_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
15169{
a7c4d6d1
HL
15170 return (__m128d)
15171 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
15172 (__v2df) __B, __imm,
15173 (__v2df)
15174 _mm_setzero_pd (),
15175 (__mmask8) -1,
15176 _MM_FROUND_CUR_DIRECTION);
15177}
15178
15179extern __inline __m128d
15180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15181_mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D,
15182 const int __imm)
15183{
15184 return (__m128d)
15185 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
15186 (__v2df) __D, __imm,
15187 (__v2df) __A,
15188 (__mmask8) __B,
15189 _MM_FROUND_CUR_DIRECTION);
15190}
15191
15192extern __inline __m128d
15193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15194_mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
15195 const int __imm)
15196{
15197 return (__m128d)
15198 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
15199 (__v2df) __C, __imm,
15200 (__v2df)
15201 _mm_setzero_pd (),
15202 (__mmask8) __A,
15203 _MM_FROUND_CUR_DIRECTION);
075691af
AI
15204}
15205
756c5857
AI
15206#else
15207#define _mm512_roundscale_ps(A, B) \
15208 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 15209 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15210#define _mm512_mask_roundscale_ps(A, B, C, D) \
15211 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
15212 (int)(D), \
15213 (__v16sf)(__m512)(A), \
15214 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
15215#define _mm512_maskz_roundscale_ps(A, B, C) \
15216 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
15217 (int)(C), \
15218 (__v16sf)_mm512_setzero_ps(),\
15219 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
15220#define _mm512_roundscale_pd(A, B) \
15221 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 15222 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15223#define _mm512_mask_roundscale_pd(A, B, C, D) \
15224 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
15225 (int)(D), \
15226 (__v8df)(__m512d)(A), \
15227 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
15228#define _mm512_maskz_roundscale_pd(A, B, C) \
15229 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
15230 (int)(C), \
15231 (__v8df)_mm512_setzero_pd(),\
15232 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
a7c4d6d1
HL
15233#define _mm_roundscale_ss(A, B, I) \
15234 ((__m128) \
15235 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
15236 (__v4sf) (__m128) (B), \
15237 (int) (I), \
15238 (__v4sf) _mm_setzero_ps (), \
15239 (__mmask8) (-1), \
15240 _MM_FROUND_CUR_DIRECTION))
15241#define _mm_mask_roundscale_ss(A, U, B, C, I) \
15242 ((__m128) \
15243 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
15244 (__v4sf) (__m128) (C), \
15245 (int) (I), \
15246 (__v4sf) (__m128) (A), \
15247 (__mmask8) (U), \
15248 _MM_FROUND_CUR_DIRECTION))
15249#define _mm_maskz_roundscale_ss(U, A, B, I) \
15250 ((__m128) \
15251 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
15252 (__v4sf) (__m128) (B), \
15253 (int) (I), \
15254 (__v4sf) _mm_setzero_ps (), \
15255 (__mmask8) (U), \
15256 _MM_FROUND_CUR_DIRECTION))
15257#define _mm_roundscale_sd(A, B, I) \
15258 ((__m128d) \
15259 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
15260 (__v2df) (__m128d) (B), \
15261 (int) (I), \
15262 (__v2df) _mm_setzero_pd (), \
15263 (__mmask8) (-1), \
15264 _MM_FROUND_CUR_DIRECTION))
15265#define _mm_mask_roundscale_sd(A, U, B, C, I) \
15266 ((__m128d) \
15267 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
15268 (__v2df) (__m128d) (C), \
15269 (int) (I), \
15270 (__v2df) (__m128d) (A), \
15271 (__mmask8) (U), \
15272 _MM_FROUND_CUR_DIRECTION))
15273#define _mm_maskz_roundscale_sd(U, A, B, I) \
15274 ((__m128d) \
15275 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
15276 (__v2df) (__m128d) (B), \
15277 (int) (I), \
15278 (__v2df) _mm_setzero_pd (), \
15279 (__mmask8) (U), \
15280 _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15281#endif
15282
15283#ifdef __OPTIMIZE__
15284extern __inline __mmask8
15285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15286_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
15287{
15288 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15289 (__v8df) __Y, __P,
15290 (__mmask8) -1,
15291 _MM_FROUND_CUR_DIRECTION);
15292}
15293
15294extern __inline __mmask16
15295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15296_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
15297{
15298 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15299 (__v16sf) __Y, __P,
15300 (__mmask16) -1,
15301 _MM_FROUND_CUR_DIRECTION);
15302}
15303
15304extern __inline __mmask16
15305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15306_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
15307{
15308 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15309 (__v16sf) __Y, __P,
15310 (__mmask16) __U,
15311 _MM_FROUND_CUR_DIRECTION);
15312}
15313
15314extern __inline __mmask8
15315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15316_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
15317{
15318 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15319 (__v8df) __Y, __P,
15320 (__mmask8) __U,
15321 _MM_FROUND_CUR_DIRECTION);
15322}
15323
12d69dbf
JJ
15324extern __inline __mmask8
15325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15326_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
15327{
15328 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
15329 (__v2df) __Y, __P,
15330 (__mmask8) -1,
15331 _MM_FROUND_CUR_DIRECTION);
15332}
15333
15334extern __inline __mmask8
15335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15336_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
15337{
15338 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
15339 (__v2df) __Y, __P,
15340 (__mmask8) __M,
15341 _MM_FROUND_CUR_DIRECTION);
15342}
15343
15344extern __inline __mmask8
15345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15346_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
15347{
15348 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
15349 (__v4sf) __Y, __P,
15350 (__mmask8) -1,
15351 _MM_FROUND_CUR_DIRECTION);
15352}
15353
15354extern __inline __mmask8
15355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15356_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
15357{
15358 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
15359 (__v4sf) __Y, __P,
15360 (__mmask8) __M,
15361 _MM_FROUND_CUR_DIRECTION);
15362}
15363
15364#else
15365#define _mm512_cmp_pd_mask(X, Y, P) \
15366 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15367 (__v8df)(__m512d)(Y), (int)(P),\
15368 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15369
15370#define _mm512_cmp_ps_mask(X, Y, P) \
15371 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15372 (__v16sf)(__m512)(Y), (int)(P),\
15373 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
15374
15375#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
15376 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15377 (__v8df)(__m512d)(Y), (int)(P),\
15378 (__mmask8)(M), _MM_FROUND_CUR_DIRECTION))
15379
15380#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
15381 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15382 (__v16sf)(__m512)(Y), (int)(P),\
15383 (__mmask16)(M),_MM_FROUND_CUR_DIRECTION))
15384
15385#define _mm_cmp_sd_mask(X, Y, P) \
15386 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
15387 (__v2df)(__m128d)(Y), (int)(P),\
15388 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15389
15390#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
15391 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
15392 (__v2df)(__m128d)(Y), (int)(P),\
15393 M,_MM_FROUND_CUR_DIRECTION))
15394
15395#define _mm_cmp_ss_mask(X, Y, P) \
15396 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
15397 (__v4sf)(__m128)(Y), (int)(P), \
15398 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15399
15400#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
15401 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
15402 (__v4sf)(__m128)(Y), (int)(P), \
15403 M,_MM_FROUND_CUR_DIRECTION))
15404#endif
15405
7e23f4a6
OM
15406extern __inline __mmask8
15407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15408_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
15409{
15410 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15411 (__v8df) __Y, _CMP_EQ_OQ,
15412 (__mmask8) -1,
15413 _MM_FROUND_CUR_DIRECTION);
15414}
15415
15416extern __inline __mmask8
15417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15418_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15419{
15420 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15421 (__v8df) __Y, _CMP_EQ_OQ,
15422 (__mmask8) __U,
15423 _MM_FROUND_CUR_DIRECTION);
15424}
15425
15426extern __inline __mmask8
15427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15428_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
15429{
15430 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15431 (__v8df) __Y, _CMP_LT_OS,
15432 (__mmask8) -1,
15433 _MM_FROUND_CUR_DIRECTION);
15434}
15435
15436extern __inline __mmask8
15437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15438_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15439{
15440 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15441 (__v8df) __Y, _CMP_LT_OS,
15442 (__mmask8) __U,
15443 _MM_FROUND_CUR_DIRECTION);
15444}
15445
15446extern __inline __mmask8
15447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15448_mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
15449{
15450 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15451 (__v8df) __Y, _CMP_LE_OS,
15452 (__mmask8) -1,
15453 _MM_FROUND_CUR_DIRECTION);
15454}
15455
15456extern __inline __mmask8
15457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15458_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15459{
15460 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15461 (__v8df) __Y, _CMP_LE_OS,
15462 (__mmask8) __U,
15463 _MM_FROUND_CUR_DIRECTION);
15464}
15465
15466extern __inline __mmask8
15467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15468_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
15469{
15470 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15471 (__v8df) __Y, _CMP_UNORD_Q,
15472 (__mmask8) -1,
15473 _MM_FROUND_CUR_DIRECTION);
15474}
15475
15476extern __inline __mmask8
15477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15478_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15479{
15480 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15481 (__v8df) __Y, _CMP_UNORD_Q,
15482 (__mmask8) __U,
15483 _MM_FROUND_CUR_DIRECTION);
15484}
15485
15486extern __inline __mmask8
15487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15488_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
15489{
15490 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15491 (__v8df) __Y, _CMP_NEQ_UQ,
15492 (__mmask8) -1,
15493 _MM_FROUND_CUR_DIRECTION);
15494}
15495
15496extern __inline __mmask8
15497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15498_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15499{
15500 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15501 (__v8df) __Y, _CMP_NEQ_UQ,
15502 (__mmask8) __U,
15503 _MM_FROUND_CUR_DIRECTION);
15504}
15505
15506extern __inline __mmask8
15507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15508_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
15509{
15510 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15511 (__v8df) __Y, _CMP_NLT_US,
15512 (__mmask8) -1,
15513 _MM_FROUND_CUR_DIRECTION);
15514}
15515
15516extern __inline __mmask8
15517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15518_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15519{
15520 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15521 (__v8df) __Y, _CMP_NLT_US,
15522 (__mmask8) __U,
15523 _MM_FROUND_CUR_DIRECTION);
15524}
15525
15526extern __inline __mmask8
15527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15528_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
15529{
15530 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15531 (__v8df) __Y, _CMP_NLE_US,
15532 (__mmask8) -1,
15533 _MM_FROUND_CUR_DIRECTION);
15534}
15535
15536extern __inline __mmask8
15537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15538_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15539{
15540 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15541 (__v8df) __Y, _CMP_NLE_US,
15542 (__mmask8) __U,
15543 _MM_FROUND_CUR_DIRECTION);
15544}
15545
15546extern __inline __mmask8
15547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15548_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
15549{
15550 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15551 (__v8df) __Y, _CMP_ORD_Q,
15552 (__mmask8) -1,
15553 _MM_FROUND_CUR_DIRECTION);
15554}
15555
15556extern __inline __mmask8
15557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15558_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15559{
15560 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15561 (__v8df) __Y, _CMP_ORD_Q,
15562 (__mmask8) __U,
15563 _MM_FROUND_CUR_DIRECTION);
15564}
15565
15566extern __inline __mmask16
15567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15568_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
15569{
15570 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15571 (__v16sf) __Y, _CMP_EQ_OQ,
15572 (__mmask16) -1,
15573 _MM_FROUND_CUR_DIRECTION);
15574}
15575
15576extern __inline __mmask16
15577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15578_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15579{
15580 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15581 (__v16sf) __Y, _CMP_EQ_OQ,
15582 (__mmask16) __U,
15583 _MM_FROUND_CUR_DIRECTION);
15584}
15585
15586extern __inline __mmask16
15587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15588_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
15589{
15590 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15591 (__v16sf) __Y, _CMP_LT_OS,
15592 (__mmask16) -1,
15593 _MM_FROUND_CUR_DIRECTION);
15594}
15595
15596extern __inline __mmask16
15597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15598_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15599{
15600 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15601 (__v16sf) __Y, _CMP_LT_OS,
15602 (__mmask16) __U,
15603 _MM_FROUND_CUR_DIRECTION);
15604}
15605
15606extern __inline __mmask16
15607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15608_mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
15609{
15610 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15611 (__v16sf) __Y, _CMP_LE_OS,
15612 (__mmask16) -1,
15613 _MM_FROUND_CUR_DIRECTION);
15614}
15615
15616extern __inline __mmask16
15617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15618_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15619{
15620 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15621 (__v16sf) __Y, _CMP_LE_OS,
15622 (__mmask16) __U,
15623 _MM_FROUND_CUR_DIRECTION);
15624}
15625
15626extern __inline __mmask16
15627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15628_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
15629{
15630 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15631 (__v16sf) __Y, _CMP_UNORD_Q,
15632 (__mmask16) -1,
15633 _MM_FROUND_CUR_DIRECTION);
15634}
15635
15636extern __inline __mmask16
15637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15638_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15639{
15640 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15641 (__v16sf) __Y, _CMP_UNORD_Q,
15642 (__mmask16) __U,
15643 _MM_FROUND_CUR_DIRECTION);
15644}
15645
15646extern __inline __mmask16
15647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15648_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
15649{
15650 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15651 (__v16sf) __Y, _CMP_NEQ_UQ,
15652 (__mmask16) -1,
15653 _MM_FROUND_CUR_DIRECTION);
15654}
15655
15656extern __inline __mmask16
15657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15658_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15659{
15660 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15661 (__v16sf) __Y, _CMP_NEQ_UQ,
15662 (__mmask16) __U,
15663 _MM_FROUND_CUR_DIRECTION);
15664}
15665
15666extern __inline __mmask16
15667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15668_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
15669{
15670 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15671 (__v16sf) __Y, _CMP_NLT_US,
15672 (__mmask16) -1,
15673 _MM_FROUND_CUR_DIRECTION);
15674}
15675
15676extern __inline __mmask16
15677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15678_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15679{
15680 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15681 (__v16sf) __Y, _CMP_NLT_US,
15682 (__mmask16) __U,
15683 _MM_FROUND_CUR_DIRECTION);
15684}
15685
15686extern __inline __mmask16
15687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15688_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
15689{
15690 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15691 (__v16sf) __Y, _CMP_NLE_US,
15692 (__mmask16) -1,
15693 _MM_FROUND_CUR_DIRECTION);
15694}
15695
15696extern __inline __mmask16
15697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15698_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15699{
15700 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15701 (__v16sf) __Y, _CMP_NLE_US,
15702 (__mmask16) __U,
15703 _MM_FROUND_CUR_DIRECTION);
15704}
15705
15706extern __inline __mmask16
15707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15708_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
15709{
15710 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15711 (__v16sf) __Y, _CMP_ORD_Q,
15712 (__mmask16) -1,
15713 _MM_FROUND_CUR_DIRECTION);
15714}
15715
15716extern __inline __mmask16
15717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15718_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15719{
15720 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15721 (__v16sf) __Y, _CMP_ORD_Q,
15722 (__mmask16) __U,
15723 _MM_FROUND_CUR_DIRECTION);
15724}
15725
2196a885
KY
15726extern __inline __mmask16
15727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15728_mm512_kmov (__mmask16 __A)
15729{
7cdb6e4c 15730 return __builtin_ia32_kmovw (__A);
2196a885
KY
15731}
15732
275be1da
IT
15733extern __inline __m512
15734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15735_mm512_castpd_ps (__m512d __A)
15736{
15737 return (__m512) (__A);
15738}
15739
15740extern __inline __m512i
15741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15742_mm512_castpd_si512 (__m512d __A)
15743{
15744 return (__m512i) (__A);
15745}
15746
15747extern __inline __m512d
15748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15749_mm512_castps_pd (__m512 __A)
15750{
15751 return (__m512d) (__A);
15752}
15753
15754extern __inline __m512i
15755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15756_mm512_castps_si512 (__m512 __A)
15757{
15758 return (__m512i) (__A);
15759}
15760
15761extern __inline __m512
15762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15763_mm512_castsi512_ps (__m512i __A)
15764{
15765 return (__m512) (__A);
15766}
15767
15768extern __inline __m512d
15769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15770_mm512_castsi512_pd (__m512i __A)
15771{
15772 return (__m512d) (__A);
15773}
15774
15775extern __inline __m128d
15776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15777_mm512_castpd512_pd128 (__m512d __A)
15778{
15779 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
15780}
15781
15782extern __inline __m128
15783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15784_mm512_castps512_ps128 (__m512 __A)
15785{
15786 return _mm512_extractf32x4_ps(__A, 0);
15787}
15788
15789extern __inline __m128i
15790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15791_mm512_castsi512_si128 (__m512i __A)
15792{
15793 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
15794}
15795
15796extern __inline __m256d
15797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15798_mm512_castpd512_pd256 (__m512d __A)
15799{
15800 return _mm512_extractf64x4_pd(__A, 0);
15801}
15802
15803extern __inline __m256
15804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15805_mm512_castps512_ps256 (__m512 __A)
15806{
15807 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
15808}
15809
15810extern __inline __m256i
15811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15812_mm512_castsi512_si256 (__m512i __A)
15813{
15814 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
15815}
15816
15817extern __inline __m512d
15818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15819_mm512_castpd128_pd512 (__m128d __A)
15820{
15821 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
15822}
15823
15824extern __inline __m512
15825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15826_mm512_castps128_ps512 (__m128 __A)
15827{
15828 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
15829}
15830
15831extern __inline __m512i
15832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15833_mm512_castsi128_si512 (__m128i __A)
15834{
15835 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
15836}
15837
15838extern __inline __m512d
15839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15840_mm512_castpd256_pd512 (__m256d __A)
15841{
15842 return __builtin_ia32_pd512_256pd (__A);
15843}
15844
15845extern __inline __m512
15846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15847_mm512_castps256_ps512 (__m256 __A)
15848{
15849 return __builtin_ia32_ps512_256ps (__A);
15850}
15851
15852extern __inline __m512i
15853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15854_mm512_castsi256_si512 (__m256i __A)
15855{
15856 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
15857}
15858
e6b2dc24
JJ
15859extern __inline __m512d
15860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15861_mm512_zextpd128_pd512 (__m128d __A)
15862{
15863 return (__m512d) _mm512_insertf32x4 (_mm512_setzero_ps (), (__m128) __A, 0);
15864}
15865
15866extern __inline __m512
15867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15868_mm512_zextps128_ps512 (__m128 __A)
15869{
15870 return _mm512_insertf32x4 (_mm512_setzero_ps (), __A, 0);
15871}
15872
15873extern __inline __m512i
15874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15875_mm512_zextsi128_si512 (__m128i __A)
15876{
15877 return _mm512_inserti32x4 (_mm512_setzero_si512 (), __A, 0);
15878}
15879
15880extern __inline __m512d
15881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15882_mm512_zextpd256_pd512 (__m256d __A)
15883{
15884 return _mm512_insertf64x4 (_mm512_setzero_pd (), __A, 0);
15885}
15886
15887extern __inline __m512
15888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15889_mm512_zextps256_ps512 (__m256 __A)
15890{
15891 return (__m512) _mm512_insertf64x4 (_mm512_setzero_pd (), (__m256d) __A, 0);
15892}
15893
15894extern __inline __m512i
15895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15896_mm512_zextsi256_si512 (__m256i __A)
15897{
15898 return _mm512_inserti64x4 (_mm512_setzero_si512 (), __A, 0);
15899}
15900
275be1da
IT
15901extern __inline __mmask16
15902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15903_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
15904{
15905 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15906 (__v16si) __B, 0,
15907 (__mmask16) -1);
15908}
15909
15910extern __inline __mmask16
15911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15912_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
15913{
15914 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15915 (__v16si) __B, 0, __U);
15916}
15917
15918extern __inline __mmask8
15919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15920_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
15921{
15922 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15923 (__v8di) __B, 0, __U);
15924}
15925
15926extern __inline __mmask8
15927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15928_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
15929{
15930 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15931 (__v8di) __B, 0,
15932 (__mmask8) -1);
15933}
15934
15935extern __inline __mmask16
15936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15937_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
15938{
15939 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15940 (__v16si) __B, 6,
15941 (__mmask16) -1);
15942}
15943
15944extern __inline __mmask16
15945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15946_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
15947{
15948 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15949 (__v16si) __B, 6, __U);
15950}
15951
15952extern __inline __mmask8
15953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15954_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
15955{
15956 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15957 (__v8di) __B, 6, __U);
15958}
15959
15960extern __inline __mmask8
15961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15962_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
15963{
15964 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15965 (__v8di) __B, 6,
15966 (__mmask8) -1);
15967}
15968
167a5b77
JJ
15969#undef __MM512_REDUCE_OP
15970#define __MM512_REDUCE_OP(op) \
15971 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
15972 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
15973 __m256i __T3 = (__m256i) (__T1 op __T2); \
15974 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
15975 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
15976 __v4si __T6 = __T4 op __T5; \
15977 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
15978 __v4si __T8 = __T6 op __T7; \
15979 return __T8[0] op __T8[1]
15980
15981extern __inline int
15982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15983_mm512_reduce_add_epi32 (__m512i __A)
15984{
15985 __MM512_REDUCE_OP (+);
15986}
15987
15988extern __inline int
15989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15990_mm512_reduce_mul_epi32 (__m512i __A)
15991{
15992 __MM512_REDUCE_OP (*);
15993}
15994
15995extern __inline int
15996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15997_mm512_reduce_and_epi32 (__m512i __A)
15998{
15999 __MM512_REDUCE_OP (&);
16000}
16001
16002extern __inline int
16003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16004_mm512_reduce_or_epi32 (__m512i __A)
16005{
16006 __MM512_REDUCE_OP (|);
16007}
16008
16009extern __inline int
16010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16011_mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
16012{
16013 __A = _mm512_maskz_mov_epi32 (__U, __A);
16014 __MM512_REDUCE_OP (+);
16015}
16016
16017extern __inline int
16018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16019_mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
16020{
16021 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
16022 __MM512_REDUCE_OP (*);
16023}
16024
16025extern __inline int
16026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16027_mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
16028{
16029 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
16030 __MM512_REDUCE_OP (&);
16031}
16032
16033extern __inline int
16034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16035_mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
16036{
16037 __A = _mm512_maskz_mov_epi32 (__U, __A);
16038 __MM512_REDUCE_OP (|);
16039}
16040
16041#undef __MM512_REDUCE_OP
16042#define __MM512_REDUCE_OP(op) \
16043 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
16044 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
16045 __m256i __T3 = _mm256_##op (__T1, __T2); \
16046 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
16047 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
16048 __m128i __T6 = _mm_##op (__T4, __T5); \
16049 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
16050 (__v4si) { 2, 3, 0, 1 }); \
16051 __m128i __T8 = _mm_##op (__T6, __T7); \
16052 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
16053 (__v4si) { 1, 0, 1, 0 }); \
16054 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
16055 return __T10[0]
16056
16057extern __inline int
16058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16059_mm512_reduce_min_epi32 (__m512i __A)
16060{
16061 __MM512_REDUCE_OP (min_epi32);
16062}
16063
16064extern __inline int
16065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16066_mm512_reduce_max_epi32 (__m512i __A)
16067{
16068 __MM512_REDUCE_OP (max_epi32);
16069}
16070
16071extern __inline unsigned int
16072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16073_mm512_reduce_min_epu32 (__m512i __A)
16074{
16075 __MM512_REDUCE_OP (min_epu32);
16076}
16077
16078extern __inline unsigned int
16079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16080_mm512_reduce_max_epu32 (__m512i __A)
16081{
16082 __MM512_REDUCE_OP (max_epu32);
16083}
16084
16085extern __inline int
16086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16087_mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
16088{
16089 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
16090 __MM512_REDUCE_OP (min_epi32);
16091}
16092
16093extern __inline int
16094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16095_mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
16096{
16097 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
16098 __MM512_REDUCE_OP (max_epi32);
16099}
16100
16101extern __inline unsigned int
16102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16103_mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
16104{
16105 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
16106 __MM512_REDUCE_OP (min_epu32);
16107}
16108
16109extern __inline unsigned int
16110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16111_mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
16112{
16113 __A = _mm512_maskz_mov_epi32 (__U, __A);
16114 __MM512_REDUCE_OP (max_epu32);
16115}
16116
16117#undef __MM512_REDUCE_OP
16118#define __MM512_REDUCE_OP(op) \
16119 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
16120 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
16121 __m256 __T3 = __T1 op __T2; \
16122 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
16123 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
16124 __m128 __T6 = __T4 op __T5; \
16125 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16126 __m128 __T8 = __T6 op __T7; \
16127 return __T8[0] op __T8[1]
16128
16129extern __inline float
16130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16131_mm512_reduce_add_ps (__m512 __A)
16132{
16133 __MM512_REDUCE_OP (+);
16134}
16135
16136extern __inline float
16137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16138_mm512_reduce_mul_ps (__m512 __A)
16139{
16140 __MM512_REDUCE_OP (*);
16141}
16142
16143extern __inline float
16144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16145_mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
16146{
16147 __A = _mm512_maskz_mov_ps (__U, __A);
16148 __MM512_REDUCE_OP (+);
16149}
16150
16151extern __inline float
16152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16153_mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
16154{
16155 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
16156 __MM512_REDUCE_OP (*);
16157}
16158
16159#undef __MM512_REDUCE_OP
16160#define __MM512_REDUCE_OP(op) \
16161 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
16162 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
16163 __m256 __T3 = _mm256_##op (__T1, __T2); \
16164 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
16165 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
16166 __m128 __T6 = _mm_##op (__T4, __T5); \
16167 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16168 __m128 __T8 = _mm_##op (__T6, __T7); \
16169 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
16170 __m128 __T10 = _mm_##op (__T8, __T9); \
16171 return __T10[0]
16172
16173extern __inline float
16174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16175_mm512_reduce_min_ps (__m512 __A)
16176{
16177 __MM512_REDUCE_OP (min_ps);
16178}
16179
16180extern __inline float
16181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16182_mm512_reduce_max_ps (__m512 __A)
16183{
16184 __MM512_REDUCE_OP (max_ps);
16185}
16186
16187extern __inline float
16188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16189_mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
16190{
16191 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
16192 __MM512_REDUCE_OP (min_ps);
16193}
16194
16195extern __inline float
16196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16197_mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
16198{
16199 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
16200 __MM512_REDUCE_OP (max_ps);
16201}
16202
16203#undef __MM512_REDUCE_OP
16204#define __MM512_REDUCE_OP(op) \
16205 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
16206 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
16207 __m256i __T3 = (__m256i) (__T1 op __T2); \
16208 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
16209 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
16210 __v2di __T6 = __T4 op __T5; \
16211 return __T6[0] op __T6[1]
16212
16213extern __inline long long
16214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16215_mm512_reduce_add_epi64 (__m512i __A)
16216{
16217 __MM512_REDUCE_OP (+);
16218}
16219
16220extern __inline long long
16221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16222_mm512_reduce_mul_epi64 (__m512i __A)
16223{
16224 __MM512_REDUCE_OP (*);
16225}
16226
16227extern __inline long long
16228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16229_mm512_reduce_and_epi64 (__m512i __A)
16230{
16231 __MM512_REDUCE_OP (&);
16232}
16233
16234extern __inline long long
16235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16236_mm512_reduce_or_epi64 (__m512i __A)
16237{
16238 __MM512_REDUCE_OP (|);
16239}
16240
16241extern __inline long long
16242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16243_mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
16244{
16245 __A = _mm512_maskz_mov_epi64 (__U, __A);
16246 __MM512_REDUCE_OP (+);
16247}
16248
16249extern __inline long long
16250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16251_mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
16252{
16253 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
16254 __MM512_REDUCE_OP (*);
16255}
16256
16257extern __inline long long
16258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16259_mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
16260{
16261 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16262 __MM512_REDUCE_OP (&);
16263}
16264
16265extern __inline long long
16266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16267_mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
16268{
16269 __A = _mm512_maskz_mov_epi64 (__U, __A);
16270 __MM512_REDUCE_OP (|);
16271}
16272
16273#undef __MM512_REDUCE_OP
16274#define __MM512_REDUCE_OP(op) \
16275 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
16276 __m512i __T2 = _mm512_##op (__A, __T1); \
16277 __m512i __T3 \
16278 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
16279 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
16280 __m512i __T4 = _mm512_##op (__T2, __T3); \
16281 __m512i __T5 \
16282 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
16283 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
16284 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
16285 return __T6[0]
16286
16287extern __inline long long
16288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16289_mm512_reduce_min_epi64 (__m512i __A)
16290{
16291 __MM512_REDUCE_OP (min_epi64);
16292}
16293
16294extern __inline long long
16295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16296_mm512_reduce_max_epi64 (__m512i __A)
16297{
16298 __MM512_REDUCE_OP (max_epi64);
16299}
16300
16301extern __inline long long
16302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16303_mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
16304{
16305 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
16306 __U, __A);
16307 __MM512_REDUCE_OP (min_epi64);
16308}
16309
16310extern __inline long long
16311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16312_mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
16313{
16314 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
16315 __U, __A);
16316 __MM512_REDUCE_OP (max_epi64);
16317}
16318
16319extern __inline unsigned long long
16320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16321_mm512_reduce_min_epu64 (__m512i __A)
16322{
16323 __MM512_REDUCE_OP (min_epu64);
16324}
16325
16326extern __inline unsigned long long
16327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16328_mm512_reduce_max_epu64 (__m512i __A)
16329{
16330 __MM512_REDUCE_OP (max_epu64);
16331}
16332
16333extern __inline unsigned long long
16334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16335_mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
16336{
16337 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16338 __MM512_REDUCE_OP (min_epu64);
16339}
16340
16341extern __inline unsigned long long
16342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16343_mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
16344{
16345 __A = _mm512_maskz_mov_epi64 (__U, __A);
16346 __MM512_REDUCE_OP (max_epu64);
16347}
16348
16349#undef __MM512_REDUCE_OP
16350#define __MM512_REDUCE_OP(op) \
16351 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16352 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16353 __m256d __T3 = __T1 op __T2; \
16354 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16355 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16356 __m128d __T6 = __T4 op __T5; \
16357 return __T6[0] op __T6[1]
16358
16359extern __inline double
16360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16361_mm512_reduce_add_pd (__m512d __A)
16362{
16363 __MM512_REDUCE_OP (+);
16364}
16365
16366extern __inline double
16367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16368_mm512_reduce_mul_pd (__m512d __A)
16369{
16370 __MM512_REDUCE_OP (*);
16371}
16372
16373extern __inline double
16374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16375_mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
16376{
16377 __A = _mm512_maskz_mov_pd (__U, __A);
16378 __MM512_REDUCE_OP (+);
16379}
16380
16381extern __inline double
16382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16383_mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
16384{
16385 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
16386 __MM512_REDUCE_OP (*);
16387}
16388
16389#undef __MM512_REDUCE_OP
16390#define __MM512_REDUCE_OP(op) \
16391 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16392 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16393 __m256d __T3 = _mm256_##op (__T1, __T2); \
16394 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16395 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16396 __m128d __T6 = _mm_##op (__T4, __T5); \
16397 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
16398 __m128d __T8 = _mm_##op (__T6, __T7); \
16399 return __T8[0]
16400
16401extern __inline double
16402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16403_mm512_reduce_min_pd (__m512d __A)
16404{
16405 __MM512_REDUCE_OP (min_pd);
16406}
16407
16408extern __inline double
16409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16410_mm512_reduce_max_pd (__m512d __A)
16411{
16412 __MM512_REDUCE_OP (max_pd);
16413}
16414
16415extern __inline double
16416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16417_mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
16418{
16419 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
16420 __MM512_REDUCE_OP (min_pd);
16421}
16422
16423extern __inline double
16424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16425_mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
16426{
16427 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
16428 __MM512_REDUCE_OP (max_pd);
16429}
16430
16431#undef __MM512_REDUCE_OP
16432
756c5857
AI
16433#ifdef __DISABLE_AVX512F__
16434#undef __DISABLE_AVX512F__
16435#pragma GCC pop_options
16436#endif /* __DISABLE_AVX512F__ */
16437
16438#endif /* _AVX512FINTRIN_H_INCLUDED */