]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512fintrin.h
Initial support for -mevex512
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
83ffe9cd 1/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
756c5857
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
2069d6fc 41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
756c5857 42typedef int __v16si __attribute__ ((__vector_size__ (64)));
2069d6fc 43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
7d9088c2 44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
2069d6fc 45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
7d9088c2 46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
2069d6fc 47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
756c5857
AI
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
c6b0037d
MG
55/* Unaligned version of the same type. */
56typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
756c5857
AI
60typedef unsigned char __mmask8;
61typedef unsigned short __mmask16;
62
dcb2c527
JJ
63extern __inline __mmask16
64__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65_mm512_int2mask (int __M)
66{
67 return (__mmask16) __M;
68}
69
70extern __inline int
71__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72_mm512_mask2int (__mmask16 __M)
73{
74 return (int) __M;
75}
76
756c5857
AI
77extern __inline __m512i
78__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79_mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
82{
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
85}
86
87/* Create the vector [A B C D E F G H I J K L M N O P]. */
88extern __inline __m512i
89__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90_mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
94{
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
98}
99
4e6a811f
JJ
100extern __inline __m512i
101__attribute__((__gnu_inline__, __always_inline__, __artificial__))
102_mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
103 short __q27, short __q26, short __q25, short __q24,
104 short __q23, short __q22, short __q21, short __q20,
105 short __q19, short __q18, short __q17, short __q16,
106 short __q15, short __q14, short __q13, short __q12,
107 short __q11, short __q10, short __q09, short __q08,
108 short __q07, short __q06, short __q05, short __q04,
109 short __q03, short __q02, short __q01, short __q00)
110{
111 return __extension__ (__m512i)(__v32hi){
112 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
113 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
114 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
115 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
116 };
117}
118
119extern __inline __m512i
120__attribute__((__gnu_inline__, __always_inline__, __artificial__))
121_mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
122 char __q59, char __q58, char __q57, char __q56,
123 char __q55, char __q54, char __q53, char __q52,
124 char __q51, char __q50, char __q49, char __q48,
125 char __q47, char __q46, char __q45, char __q44,
126 char __q43, char __q42, char __q41, char __q40,
127 char __q39, char __q38, char __q37, char __q36,
128 char __q35, char __q34, char __q33, char __q32,
129 char __q31, char __q30, char __q29, char __q28,
130 char __q27, char __q26, char __q25, char __q24,
131 char __q23, char __q22, char __q21, char __q20,
132 char __q19, char __q18, char __q17, char __q16,
133 char __q15, char __q14, char __q13, char __q12,
134 char __q11, char __q10, char __q09, char __q08,
135 char __q07, char __q06, char __q05, char __q04,
136 char __q03, char __q02, char __q01, char __q00)
137{
138 return __extension__ (__m512i)(__v64qi){
139 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
140 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
141 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
142 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
143 __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
144 __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
145 __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
146 __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
147 };
148}
149
756c5857
AI
150extern __inline __m512d
151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
152_mm512_set_pd (double __A, double __B, double __C, double __D,
153 double __E, double __F, double __G, double __H)
154{
155 return __extension__ (__m512d)
156 { __H, __G, __F, __E, __D, __C, __B, __A };
157}
158
159extern __inline __m512
160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161_mm512_set_ps (float __A, float __B, float __C, float __D,
162 float __E, float __F, float __G, float __H,
163 float __I, float __J, float __K, float __L,
164 float __M, float __N, float __O, float __P)
165{
166 return __extension__ (__m512)
167 { __P, __O, __N, __M, __L, __K, __J, __I,
168 __H, __G, __F, __E, __D, __C, __B, __A };
169}
170
171#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
172 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
173
174#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
175 e8,e9,e10,e11,e12,e13,e14,e15) \
176 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
177
178#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
179 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
180
181#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
182 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
183
0b192937
UD
184extern __inline __m512
185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186_mm512_undefined_ps (void)
187{
6b0907b4
JJ
188#pragma GCC diagnostic push
189#pragma GCC diagnostic ignored "-Winit-self"
0b192937 190 __m512 __Y = __Y;
6b0907b4 191#pragma GCC diagnostic pop
0b192937
UD
192 return __Y;
193}
194
dcb2c527
JJ
195#define _mm512_undefined _mm512_undefined_ps
196
0b192937
UD
197extern __inline __m512d
198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199_mm512_undefined_pd (void)
200{
6b0907b4
JJ
201#pragma GCC diagnostic push
202#pragma GCC diagnostic ignored "-Winit-self"
0b192937 203 __m512d __Y = __Y;
6b0907b4 204#pragma GCC diagnostic pop
0b192937
UD
205 return __Y;
206}
207
208extern __inline __m512i
209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4271e5cb 210_mm512_undefined_epi32 (void)
0b192937 211{
6b0907b4
JJ
212#pragma GCC diagnostic push
213#pragma GCC diagnostic ignored "-Winit-self"
0b192937 214 __m512i __Y = __Y;
6b0907b4 215#pragma GCC diagnostic pop
0b192937
UD
216 return __Y;
217}
218
4271e5cb
UB
219#define _mm512_undefined_si512 _mm512_undefined_epi32
220
7d9088c2
UD
221extern __inline __m512i
222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223_mm512_set1_epi8 (char __A)
224{
225 return __extension__ (__m512i)(__v64qi)
226 { __A, __A, __A, __A, __A, __A, __A, __A,
227 __A, __A, __A, __A, __A, __A, __A, __A,
228 __A, __A, __A, __A, __A, __A, __A, __A,
229 __A, __A, __A, __A, __A, __A, __A, __A,
230 __A, __A, __A, __A, __A, __A, __A, __A,
231 __A, __A, __A, __A, __A, __A, __A, __A,
232 __A, __A, __A, __A, __A, __A, __A, __A,
233 __A, __A, __A, __A, __A, __A, __A, __A };
234}
235
236extern __inline __m512i
237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238_mm512_set1_epi16 (short __A)
239{
240 return __extension__ (__m512i)(__v32hi)
241 { __A, __A, __A, __A, __A, __A, __A, __A,
242 __A, __A, __A, __A, __A, __A, __A, __A,
243 __A, __A, __A, __A, __A, __A, __A, __A,
244 __A, __A, __A, __A, __A, __A, __A, __A };
245}
246
2b2384e8
UD
247extern __inline __m512d
248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
249_mm512_set1_pd (double __A)
250{
43373412 251 return __extension__ (__m512d)(__v8df)
252 { __A, __A, __A, __A, __A, __A, __A, __A };
2b2384e8
UD
253}
254
255extern __inline __m512
256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257_mm512_set1_ps (float __A)
258{
43373412 259 return __extension__ (__m512)(__v16sf)
260 { __A, __A, __A, __A, __A, __A, __A, __A,
261 __A, __A, __A, __A, __A, __A, __A, __A };
2b2384e8
UD
262}
263
7d9088c2
UD
264/* Create the vector [A B C D A B C D A B C D A B C D]. */
265extern __inline __m512i
266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
267_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
268{
269 return __extension__ (__m512i)(__v16si)
270 { __D, __C, __B, __A, __D, __C, __B, __A,
271 __D, __C, __B, __A, __D, __C, __B, __A };
272}
273
274extern __inline __m512i
275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276_mm512_set4_epi64 (long long __A, long long __B, long long __C,
277 long long __D)
278{
279 return __extension__ (__m512i) (__v8di)
280 { __D, __C, __B, __A, __D, __C, __B, __A };
281}
282
283extern __inline __m512d
284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
285_mm512_set4_pd (double __A, double __B, double __C, double __D)
286{
287 return __extension__ (__m512d)
288 { __D, __C, __B, __A, __D, __C, __B, __A };
289}
290
291extern __inline __m512
292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
293_mm512_set4_ps (float __A, float __B, float __C, float __D)
294{
295 return __extension__ (__m512)
296 { __D, __C, __B, __A, __D, __C, __B, __A,
297 __D, __C, __B, __A, __D, __C, __B, __A };
298}
299
300#define _mm512_setr4_epi64(e0,e1,e2,e3) \
301 _mm512_set4_epi64(e3,e2,e1,e0)
302
303#define _mm512_setr4_epi32(e0,e1,e2,e3) \
304 _mm512_set4_epi32(e3,e2,e1,e0)
305
306#define _mm512_setr4_pd(e0,e1,e2,e3) \
307 _mm512_set4_pd(e3,e2,e1,e0)
308
309#define _mm512_setr4_ps(e0,e1,e2,e3) \
310 _mm512_set4_ps(e3,e2,e1,e0)
311
756c5857
AI
312extern __inline __m512
313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
314_mm512_setzero_ps (void)
315{
316 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
317 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
318}
319
4e6a811f
JJ
320extern __inline __m512
321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322_mm512_setzero (void)
323{
324 return _mm512_setzero_ps ();
325}
326
756c5857
AI
327extern __inline __m512d
328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
329_mm512_setzero_pd (void)
330{
331 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
332}
333
7d9088c2
UD
334extern __inline __m512i
335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
336_mm512_setzero_epi32 (void)
337{
338 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
339}
340
756c5857
AI
341extern __inline __m512i
342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343_mm512_setzero_si512 (void)
344{
345 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
346}
347
348extern __inline __m512d
349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
350_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
351{
352 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
353 (__v8df) __W,
354 (__mmask8) __U);
355}
356
357extern __inline __m512d
358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
359_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
360{
361 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
362 (__v8df)
363 _mm512_setzero_pd (),
364 (__mmask8) __U);
365}
366
367extern __inline __m512
368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
369_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
370{
371 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
372 (__v16sf) __W,
373 (__mmask16) __U);
374}
375
376extern __inline __m512
377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
379{
380 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
381 (__v16sf)
382 _mm512_setzero_ps (),
383 (__mmask16) __U);
384}
385
386extern __inline __m512d
387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
388_mm512_load_pd (void const *__P)
389{
390 return *(__m512d *) __P;
391}
392
393extern __inline __m512d
394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
396{
397 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
398 (__v8df) __W,
399 (__mmask8) __U);
400}
401
402extern __inline __m512d
403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
404_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
405{
406 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
407 (__v8df)
408 _mm512_setzero_pd (),
409 (__mmask8) __U);
410}
411
412extern __inline void
413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
414_mm512_store_pd (void *__P, __m512d __A)
415{
416 *(__m512d *) __P = __A;
417}
418
419extern __inline void
420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
421_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
422{
423 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
424 (__mmask8) __U);
425}
426
427extern __inline __m512
428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
429_mm512_load_ps (void const *__P)
430{
431 return *(__m512 *) __P;
432}
433
434extern __inline __m512
435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
437{
438 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
439 (__v16sf) __W,
440 (__mmask16) __U);
441}
442
443extern __inline __m512
444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
445_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
446{
447 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
448 (__v16sf)
449 _mm512_setzero_ps (),
450 (__mmask16) __U);
451}
452
453extern __inline void
454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
455_mm512_store_ps (void *__P, __m512 __A)
456{
457 *(__m512 *) __P = __A;
458}
459
460extern __inline void
461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
462_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
463{
464 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
465 (__mmask16) __U);
466}
467
468extern __inline __m512i
469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
470_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
471{
472 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
473 (__v8di) __W,
474 (__mmask8) __U);
475}
476
477extern __inline __m512i
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
480{
481 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
482 (__v8di)
483 _mm512_setzero_si512 (),
484 (__mmask8) __U);
485}
486
487extern __inline __m512i
488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
489_mm512_load_epi64 (void const *__P)
490{
491 return *(__m512i *) __P;
492}
493
494extern __inline __m512i
495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
497{
498 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
499 (__v8di) __W,
500 (__mmask8) __U);
501}
502
503extern __inline __m512i
504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
506{
507 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
508 (__v8di)
509 _mm512_setzero_si512 (),
510 (__mmask8) __U);
511}
512
513extern __inline void
514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
515_mm512_store_epi64 (void *__P, __m512i __A)
516{
517 *(__m512i *) __P = __A;
518}
519
520extern __inline void
521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
522_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
523{
524 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
525 (__mmask8) __U);
526}
527
528extern __inline __m512i
529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
530_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
531{
532 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
533 (__v16si) __W,
534 (__mmask16) __U);
535}
536
537extern __inline __m512i
538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
540{
541 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
542 (__v16si)
543 _mm512_setzero_si512 (),
544 (__mmask16) __U);
545}
546
547extern __inline __m512i
548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
549_mm512_load_si512 (void const *__P)
550{
551 return *(__m512i *) __P;
552}
553
554extern __inline __m512i
555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
556_mm512_load_epi32 (void const *__P)
557{
558 return *(__m512i *) __P;
559}
560
561extern __inline __m512i
562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
563_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
564{
565 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
566 (__v16si) __W,
567 (__mmask16) __U);
568}
569
570extern __inline __m512i
571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
572_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
573{
574 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
575 (__v16si)
576 _mm512_setzero_si512 (),
577 (__mmask16) __U);
578}
579
580extern __inline void
581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
582_mm512_store_si512 (void *__P, __m512i __A)
583{
584 *(__m512i *) __P = __A;
585}
586
587extern __inline void
588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
589_mm512_store_epi32 (void *__P, __m512i __A)
590{
591 *(__m512i *) __P = __A;
592}
593
594extern __inline void
595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
597{
598 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
599 (__mmask16) __U);
600}
601
602extern __inline __m512i
603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
604_mm512_mullo_epi32 (__m512i __A, __m512i __B)
605{
2069d6fc 606 return (__m512i) ((__v16su) __A * (__v16su) __B);
756c5857
AI
607}
608
609extern __inline __m512i
610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
612{
613 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
614 (__v16si) __B,
615 (__v16si)
616 _mm512_setzero_si512 (),
617 __M);
618}
619
620extern __inline __m512i
621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
623{
624 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
625 (__v16si) __B,
626 (__v16si) __W, __M);
627}
628
503ac4e0
JJ
629extern __inline __m512i
630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
631_mm512_mullox_epi64 (__m512i __A, __m512i __B)
632{
633 return (__m512i) ((__v8du) __A * (__v8du) __B);
634}
635
636extern __inline __m512i
637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
638_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
639{
640 return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
641}
642
756c5857
AI
643extern __inline __m512i
644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
646{
647 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
648 (__v16si) __Y,
649 (__v16si)
4271e5cb 650 _mm512_undefined_epi32 (),
756c5857
AI
651 (__mmask16) -1);
652}
653
654extern __inline __m512i
655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
656_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
657{
658 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
659 (__v16si) __Y,
660 (__v16si) __W,
661 (__mmask16) __U);
662}
663
664extern __inline __m512i
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
667{
668 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
669 (__v16si) __Y,
670 (__v16si)
671 _mm512_setzero_si512 (),
672 (__mmask16) __U);
673}
674
675extern __inline __m512i
676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
677_mm512_srav_epi32 (__m512i __X, __m512i __Y)
678{
679 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
680 (__v16si) __Y,
681 (__v16si)
4271e5cb 682 _mm512_undefined_epi32 (),
756c5857
AI
683 (__mmask16) -1);
684}
685
686extern __inline __m512i
687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
688_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
689{
690 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
691 (__v16si) __Y,
692 (__v16si) __W,
693 (__mmask16) __U);
694}
695
696extern __inline __m512i
697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
698_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
699{
700 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
701 (__v16si) __Y,
702 (__v16si)
703 _mm512_setzero_si512 (),
704 (__mmask16) __U);
705}
706
707extern __inline __m512i
708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
709_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
710{
711 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
712 (__v16si) __Y,
713 (__v16si)
4271e5cb 714 _mm512_undefined_epi32 (),
756c5857
AI
715 (__mmask16) -1);
716}
717
718extern __inline __m512i
719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
720_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
721{
722 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
723 (__v16si) __Y,
724 (__v16si) __W,
725 (__mmask16) __U);
726}
727
728extern __inline __m512i
729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
730_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
731{
732 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
733 (__v16si) __Y,
734 (__v16si)
735 _mm512_setzero_si512 (),
736 (__mmask16) __U);
737}
738
739extern __inline __m512i
740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
741_mm512_add_epi64 (__m512i __A, __m512i __B)
742{
2069d6fc 743 return (__m512i) ((__v8du) __A + (__v8du) __B);
756c5857
AI
744}
745
746extern __inline __m512i
747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
748_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
749{
750 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
751 (__v8di) __B,
752 (__v8di) __W,
753 (__mmask8) __U);
754}
755
756extern __inline __m512i
757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
758_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
759{
760 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
761 (__v8di) __B,
762 (__v8di)
763 _mm512_setzero_si512 (),
764 (__mmask8) __U);
765}
766
767extern __inline __m512i
768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
769_mm512_sub_epi64 (__m512i __A, __m512i __B)
770{
2069d6fc 771 return (__m512i) ((__v8du) __A - (__v8du) __B);
756c5857
AI
772}
773
774extern __inline __m512i
775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
777{
778 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
779 (__v8di) __B,
780 (__v8di) __W,
781 (__mmask8) __U);
782}
783
784extern __inline __m512i
785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
787{
788 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
789 (__v8di) __B,
790 (__v8di)
791 _mm512_setzero_si512 (),
792 (__mmask8) __U);
793}
794
795extern __inline __m512i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
798{
799 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
800 (__v8di) __Y,
801 (__v8di)
0b192937 802 _mm512_undefined_pd (),
756c5857
AI
803 (__mmask8) -1);
804}
805
806extern __inline __m512i
807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
808_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
809{
810 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
811 (__v8di) __Y,
812 (__v8di) __W,
813 (__mmask8) __U);
814}
815
816extern __inline __m512i
817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
819{
820 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
821 (__v8di) __Y,
822 (__v8di)
823 _mm512_setzero_si512 (),
824 (__mmask8) __U);
825}
826
827extern __inline __m512i
828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829_mm512_srav_epi64 (__m512i __X, __m512i __Y)
830{
831 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
832 (__v8di) __Y,
833 (__v8di)
4271e5cb 834 _mm512_undefined_epi32 (),
756c5857
AI
835 (__mmask8) -1);
836}
837
838extern __inline __m512i
839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
840_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
841{
842 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
843 (__v8di) __Y,
844 (__v8di) __W,
845 (__mmask8) __U);
846}
847
848extern __inline __m512i
849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
851{
852 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
853 (__v8di) __Y,
854 (__v8di)
855 _mm512_setzero_si512 (),
856 (__mmask8) __U);
857}
858
859extern __inline __m512i
860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
862{
863 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
864 (__v8di) __Y,
865 (__v8di)
4271e5cb 866 _mm512_undefined_epi32 (),
756c5857
AI
867 (__mmask8) -1);
868}
869
870extern __inline __m512i
871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
872_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
873{
874 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
875 (__v8di) __Y,
876 (__v8di) __W,
877 (__mmask8) __U);
878}
879
880extern __inline __m512i
881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
883{
884 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
885 (__v8di) __Y,
886 (__v8di)
887 _mm512_setzero_si512 (),
888 (__mmask8) __U);
889}
890
891extern __inline __m512i
892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893_mm512_add_epi32 (__m512i __A, __m512i __B)
894{
2069d6fc 895 return (__m512i) ((__v16su) __A + (__v16su) __B);
756c5857
AI
896}
897
898extern __inline __m512i
899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
901{
902 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
903 (__v16si) __B,
904 (__v16si) __W,
905 (__mmask16) __U);
906}
907
908extern __inline __m512i
909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
910_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
911{
912 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
913 (__v16si) __B,
914 (__v16si)
915 _mm512_setzero_si512 (),
916 (__mmask16) __U);
917}
918
919extern __inline __m512i
920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921_mm512_mul_epi32 (__m512i __X, __m512i __Y)
922{
923 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
924 (__v16si) __Y,
925 (__v8di)
4271e5cb 926 _mm512_undefined_epi32 (),
756c5857
AI
927 (__mmask8) -1);
928}
929
930extern __inline __m512i
931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
932_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
933{
934 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
935 (__v16si) __Y,
936 (__v8di) __W, __M);
937}
938
939extern __inline __m512i
940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
941_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
942{
943 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
944 (__v16si) __Y,
945 (__v8di)
946 _mm512_setzero_si512 (),
947 __M);
948}
949
950extern __inline __m512i
951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
952_mm512_sub_epi32 (__m512i __A, __m512i __B)
953{
2069d6fc 954 return (__m512i) ((__v16su) __A - (__v16su) __B);
756c5857
AI
955}
956
957extern __inline __m512i
958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
959_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
960{
961 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
962 (__v16si) __B,
963 (__v16si) __W,
964 (__mmask16) __U);
965}
966
967extern __inline __m512i
968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
970{
971 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
972 (__v16si) __B,
973 (__v16si)
974 _mm512_setzero_si512 (),
975 (__mmask16) __U);
976}
977
978extern __inline __m512i
979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
980_mm512_mul_epu32 (__m512i __X, __m512i __Y)
981{
982 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
983 (__v16si) __Y,
984 (__v8di)
4271e5cb 985 _mm512_undefined_epi32 (),
756c5857
AI
986 (__mmask8) -1);
987}
988
989extern __inline __m512i
990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
991_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
992{
993 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
994 (__v16si) __Y,
995 (__v8di) __W, __M);
996}
997
998extern __inline __m512i
999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1000_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
1001{
1002 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1003 (__v16si) __Y,
1004 (__v8di)
1005 _mm512_setzero_si512 (),
1006 __M);
1007}
1008
1009#ifdef __OPTIMIZE__
1010extern __inline __m512i
1011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1012_mm512_slli_epi64 (__m512i __A, unsigned int __B)
1013{
1014 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1015 (__v8di)
4271e5cb 1016 _mm512_undefined_epi32 (),
756c5857
AI
1017 (__mmask8) -1);
1018}
1019
1020extern __inline __m512i
1021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1022_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1023 unsigned int __B)
1024{
1025 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1026 (__v8di) __W,
1027 (__mmask8) __U);
1028}
1029
1030extern __inline __m512i
1031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1032_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1033{
1034 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1035 (__v8di)
1036 _mm512_setzero_si512 (),
1037 (__mmask8) __U);
1038}
1039#else
4a84a2db
HL
1040#define _mm512_slli_epi64(X, C) \
1041 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
1042 (unsigned int)(C), \
1043 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
1044 (__mmask8)-1))
1045
4a84a2db
HL
1046#define _mm512_mask_slli_epi64(W, U, X, C) \
1047 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
1048 (unsigned int)(C), \
1049 (__v8di)(__m512i)(W), \
756c5857
AI
1050 (__mmask8)(U)))
1051
4a84a2db
HL
1052#define _mm512_maskz_slli_epi64(U, X, C) \
1053 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
1054 (unsigned int)(C), \
1055 (__v8di)(__m512i)_mm512_setzero_si512 (), \
756c5857
AI
1056 (__mmask8)(U)))
1057#endif
1058
1059extern __inline __m512i
1060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1061_mm512_sll_epi64 (__m512i __A, __m128i __B)
1062{
1063 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1064 (__v2di) __B,
1065 (__v8di)
4271e5cb 1066 _mm512_undefined_epi32 (),
756c5857
AI
1067 (__mmask8) -1);
1068}
1069
1070extern __inline __m512i
1071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1073{
1074 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1075 (__v2di) __B,
1076 (__v8di) __W,
1077 (__mmask8) __U);
1078}
1079
1080extern __inline __m512i
1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1083{
1084 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1085 (__v2di) __B,
1086 (__v8di)
1087 _mm512_setzero_si512 (),
1088 (__mmask8) __U);
1089}
1090
1091#ifdef __OPTIMIZE__
1092extern __inline __m512i
1093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1094_mm512_srli_epi64 (__m512i __A, unsigned int __B)
1095{
1096 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1097 (__v8di)
4271e5cb 1098 _mm512_undefined_epi32 (),
756c5857
AI
1099 (__mmask8) -1);
1100}
1101
1102extern __inline __m512i
1103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1104_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1105 __m512i __A, unsigned int __B)
1106{
1107 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1108 (__v8di) __W,
1109 (__mmask8) __U);
1110}
1111
1112extern __inline __m512i
1113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1114_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1115{
1116 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1117 (__v8di)
1118 _mm512_setzero_si512 (),
1119 (__mmask8) __U);
1120}
1121#else
4a84a2db
HL
1122#define _mm512_srli_epi64(X, C) \
1123 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
1124 (unsigned int)(C), \
1125 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
1126 (__mmask8)-1))
1127
4a84a2db
HL
1128#define _mm512_mask_srli_epi64(W, U, X, C) \
1129 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
1130 (unsigned int)(C), \
1131 (__v8di)(__m512i)(W), \
756c5857
AI
1132 (__mmask8)(U)))
1133
4a84a2db
HL
1134#define _mm512_maskz_srli_epi64(U, X, C) \
1135 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
1136 (unsigned int)(C), \
1137 (__v8di)(__m512i)_mm512_setzero_si512 (), \
756c5857
AI
1138 (__mmask8)(U)))
1139#endif
1140
1141extern __inline __m512i
1142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1143_mm512_srl_epi64 (__m512i __A, __m128i __B)
1144{
1145 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1146 (__v2di) __B,
1147 (__v8di)
4271e5cb 1148 _mm512_undefined_epi32 (),
756c5857
AI
1149 (__mmask8) -1);
1150}
1151
1152extern __inline __m512i
1153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1155{
1156 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1157 (__v2di) __B,
1158 (__v8di) __W,
1159 (__mmask8) __U);
1160}
1161
1162extern __inline __m512i
1163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1164_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1165{
1166 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1167 (__v2di) __B,
1168 (__v8di)
1169 _mm512_setzero_si512 (),
1170 (__mmask8) __U);
1171}
1172
1173#ifdef __OPTIMIZE__
1174extern __inline __m512i
1175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1176_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1177{
1178 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1179 (__v8di)
4271e5cb 1180 _mm512_undefined_epi32 (),
756c5857
AI
1181 (__mmask8) -1);
1182}
1183
1184extern __inline __m512i
1185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1186_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1187 unsigned int __B)
1188{
1189 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1190 (__v8di) __W,
1191 (__mmask8) __U);
1192}
1193
1194extern __inline __m512i
1195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1197{
1198 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1199 (__v8di)
1200 _mm512_setzero_si512 (),
1201 (__mmask8) __U);
1202}
1203#else
4a84a2db
HL
1204#define _mm512_srai_epi64(X, C) \
1205 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
1206 (unsigned int)(C), \
1207 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
1208 (__mmask8)-1))
1209
4a84a2db
HL
1210#define _mm512_mask_srai_epi64(W, U, X, C) \
1211 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
1212 (unsigned int)(C), \
1213 (__v8di)(__m512i)(W), \
756c5857
AI
1214 (__mmask8)(U)))
1215
4a84a2db
HL
1216#define _mm512_maskz_srai_epi64(U, X, C) \
1217 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
1218 (unsigned int)(C), \
1219 (__v8di)(__m512i)_mm512_setzero_si512 (), \
756c5857
AI
1220 (__mmask8)(U)))
1221#endif
1222
1223extern __inline __m512i
1224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1225_mm512_sra_epi64 (__m512i __A, __m128i __B)
1226{
1227 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1228 (__v2di) __B,
1229 (__v8di)
4271e5cb 1230 _mm512_undefined_epi32 (),
756c5857
AI
1231 (__mmask8) -1);
1232}
1233
1234extern __inline __m512i
1235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1236_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1237{
1238 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1239 (__v2di) __B,
1240 (__v8di) __W,
1241 (__mmask8) __U);
1242}
1243
1244extern __inline __m512i
1245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1246_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1247{
1248 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1249 (__v2di) __B,
1250 (__v8di)
1251 _mm512_setzero_si512 (),
1252 (__mmask8) __U);
1253}
1254
1255#ifdef __OPTIMIZE__
1256extern __inline __m512i
1257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1258_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1259{
1260 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1261 (__v16si)
4271e5cb 1262 _mm512_undefined_epi32 (),
756c5857
AI
1263 (__mmask16) -1);
1264}
1265
1266extern __inline __m512i
1267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1268_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1269 unsigned int __B)
1270{
1271 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1272 (__v16si) __W,
1273 (__mmask16) __U);
1274}
1275
1276extern __inline __m512i
1277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1278_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1279{
1280 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1281 (__v16si)
1282 _mm512_setzero_si512 (),
1283 (__mmask16) __U);
1284}
1285#else
4a84a2db
HL
1286#define _mm512_slli_epi32(X, C) \
1287 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
1288 (unsigned int)(C), \
1289 (__v16si)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
1290 (__mmask16)-1))
1291
4a84a2db
HL
1292#define _mm512_mask_slli_epi32(W, U, X, C) \
1293 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
1294 (unsigned int)(C), \
1295 (__v16si)(__m512i)(W), \
756c5857
AI
1296 (__mmask16)(U)))
1297
4a84a2db
HL
1298#define _mm512_maskz_slli_epi32(U, X, C) \
1299 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
1300 (unsigned int)(C), \
1301 (__v16si)(__m512i)_mm512_setzero_si512 (), \
756c5857
AI
1302 (__mmask16)(U)))
1303#endif
1304
1305extern __inline __m512i
1306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1307_mm512_sll_epi32 (__m512i __A, __m128i __B)
1308{
1309 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1310 (__v4si) __B,
1311 (__v16si)
4271e5cb 1312 _mm512_undefined_epi32 (),
756c5857
AI
1313 (__mmask16) -1);
1314}
1315
1316extern __inline __m512i
1317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1318_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1319{
1320 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1321 (__v4si) __B,
1322 (__v16si) __W,
1323 (__mmask16) __U);
1324}
1325
1326extern __inline __m512i
1327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1328_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1329{
1330 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1331 (__v4si) __B,
1332 (__v16si)
1333 _mm512_setzero_si512 (),
1334 (__mmask16) __U);
1335}
1336
1337#ifdef __OPTIMIZE__
1338extern __inline __m512i
1339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1341{
1342 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1343 (__v16si)
4271e5cb 1344 _mm512_undefined_epi32 (),
756c5857
AI
1345 (__mmask16) -1);
1346}
1347
1348extern __inline __m512i
1349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1351 __m512i __A, unsigned int __B)
1352{
1353 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1354 (__v16si) __W,
1355 (__mmask16) __U);
1356}
1357
1358extern __inline __m512i
1359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1360_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1361{
1362 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1363 (__v16si)
1364 _mm512_setzero_si512 (),
1365 (__mmask16) __U);
1366}
1367#else
4a84a2db
HL
1368#define _mm512_srli_epi32(X, C) \
1369 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
1370 (unsigned int)(C), \
4271e5cb 1371 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1372 (__mmask16)-1))
1373
4a84a2db
HL
1374#define _mm512_mask_srli_epi32(W, U, X, C) \
1375 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
1376 (unsigned int)(C), \
1377 (__v16si)(__m512i)(W), \
756c5857
AI
1378 (__mmask16)(U)))
1379
4a84a2db
HL
1380#define _mm512_maskz_srli_epi32(U, X, C) \
1381 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
1382 (unsigned int)(C), \
1383 (__v16si)(__m512i)_mm512_setzero_si512 (), \
756c5857
AI
1384 (__mmask16)(U)))
1385#endif
1386
1387extern __inline __m512i
1388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389_mm512_srl_epi32 (__m512i __A, __m128i __B)
1390{
1391 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1392 (__v4si) __B,
1393 (__v16si)
4271e5cb 1394 _mm512_undefined_epi32 (),
756c5857
AI
1395 (__mmask16) -1);
1396}
1397
1398extern __inline __m512i
1399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1400_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1401{
1402 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1403 (__v4si) __B,
1404 (__v16si) __W,
1405 (__mmask16) __U);
1406}
1407
1408extern __inline __m512i
1409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1411{
1412 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1413 (__v4si) __B,
1414 (__v16si)
1415 _mm512_setzero_si512 (),
1416 (__mmask16) __U);
1417}
1418
1419#ifdef __OPTIMIZE__
1420extern __inline __m512i
1421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1423{
1424 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1425 (__v16si)
4271e5cb 1426 _mm512_undefined_epi32 (),
756c5857
AI
1427 (__mmask16) -1);
1428}
1429
1430extern __inline __m512i
1431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1433 unsigned int __B)
1434{
1435 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1436 (__v16si) __W,
1437 (__mmask16) __U);
1438}
1439
1440extern __inline __m512i
1441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1442_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1443{
1444 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1445 (__v16si)
1446 _mm512_setzero_si512 (),
1447 (__mmask16) __U);
1448}
1449#else
4a84a2db
HL
1450#define _mm512_srai_epi32(X, C) \
1451 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
1452 (unsigned int)(C), \
4271e5cb 1453 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1454 (__mmask16)-1))
1455
4a84a2db
HL
1456#define _mm512_mask_srai_epi32(W, U, X, C) \
1457 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
1458 (unsigned int)(C), \
1459 (__v16si)(__m512i)(W), \
756c5857
AI
1460 (__mmask16)(U)))
1461
4a84a2db
HL
1462#define _mm512_maskz_srai_epi32(U, X, C) \
1463 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
1464 (unsigned int)(C), \
1465 (__v16si)(__m512i)_mm512_setzero_si512 (), \
756c5857
AI
1466 (__mmask16)(U)))
1467#endif
1468
1469extern __inline __m512i
1470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471_mm512_sra_epi32 (__m512i __A, __m128i __B)
1472{
1473 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1474 (__v4si) __B,
1475 (__v16si)
4271e5cb 1476 _mm512_undefined_epi32 (),
756c5857
AI
1477 (__mmask16) -1);
1478}
1479
1480extern __inline __m512i
1481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1482_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1483{
1484 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1485 (__v4si) __B,
1486 (__v16si) __W,
1487 (__mmask16) __U);
1488}
1489
1490extern __inline __m512i
1491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1493{
1494 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1495 (__v4si) __B,
1496 (__v16si)
1497 _mm512_setzero_si512 (),
1498 (__mmask16) __U);
1499}
1500
075691af
AI
1501#ifdef __OPTIMIZE__
1502extern __inline __m128d
1503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1504_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1505{
1506 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1507 (__v2df) __B,
1508 __R);
1509}
1510
1853f5c7
SP
1511extern __inline __m128d
1512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1513_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1514 __m128d __B, const int __R)
1515{
1516 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1517 (__v2df) __B,
1518 (__v2df) __W,
1519 (__mmask8) __U, __R);
1520}
1521
1522extern __inline __m128d
1523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1524_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1525 const int __R)
1526{
1527 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1528 (__v2df) __B,
1529 (__v2df)
1530 _mm_setzero_pd (),
1531 (__mmask8) __U, __R);
1532}
1533
075691af
AI
1534extern __inline __m128
1535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1537{
1538 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1539 (__v4sf) __B,
1540 __R);
1541}
1542
1853f5c7
SP
1543extern __inline __m128
1544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1546 __m128 __B, const int __R)
1547{
1548 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1549 (__v4sf) __B,
1550 (__v4sf) __W,
1551 (__mmask8) __U, __R);
1552}
1553
1554extern __inline __m128
1555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1556_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1557 const int __R)
1558{
1559 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1560 (__v4sf) __B,
1561 (__v4sf)
1562 _mm_setzero_ps (),
1563 (__mmask8) __U, __R);
1564}
1565
075691af
AI
1566extern __inline __m128d
1567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1568_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1569{
1570 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1571 (__v2df) __B,
1572 __R);
1573}
1574
1853f5c7
SP
1575extern __inline __m128d
1576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1577_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1578 __m128d __B, const int __R)
1579{
1580 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1581 (__v2df) __B,
1582 (__v2df) __W,
1583 (__mmask8) __U, __R);
1584}
1585
1586extern __inline __m128d
1587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1588_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1589 const int __R)
1590{
1591 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1592 (__v2df) __B,
1593 (__v2df)
1594 _mm_setzero_pd (),
1595 (__mmask8) __U, __R);
1596}
1597
075691af
AI
1598extern __inline __m128
1599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1601{
1602 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1603 (__v4sf) __B,
1604 __R);
1605}
1606
1853f5c7
SP
1607extern __inline __m128
1608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1609_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1610 __m128 __B, const int __R)
1611{
1612 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1613 (__v4sf) __B,
1614 (__v4sf) __W,
1615 (__mmask8) __U, __R);
1616}
1617
1618extern __inline __m128
1619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1620_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1621 const int __R)
1622{
1623 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1624 (__v4sf) __B,
1625 (__v4sf)
1626 _mm_setzero_ps (),
1627 (__mmask8) __U, __R);
1628}
1629
075691af
AI
1630#else
1631#define _mm_add_round_sd(A, B, C) \
1632 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1633
1853f5c7
SP
1634#define _mm_mask_add_round_sd(W, U, A, B, C) \
1635 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1636
1637#define _mm_maskz_add_round_sd(U, A, B, C) \
1638 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1639
075691af
AI
1640#define _mm_add_round_ss(A, B, C) \
1641 (__m128)__builtin_ia32_addss_round(A, B, C)
1642
1853f5c7
SP
1643#define _mm_mask_add_round_ss(W, U, A, B, C) \
1644 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1645
1646#define _mm_maskz_add_round_ss(U, A, B, C) \
1647 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1648
075691af
AI
1649#define _mm_sub_round_sd(A, B, C) \
1650 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1651
1853f5c7
SP
1652#define _mm_mask_sub_round_sd(W, U, A, B, C) \
1653 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1654
1655#define _mm_maskz_sub_round_sd(U, A, B, C) \
1656 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1657
075691af
AI
1658#define _mm_sub_round_ss(A, B, C) \
1659 (__m128)__builtin_ia32_subss_round(A, B, C)
1853f5c7
SP
1660
1661#define _mm_mask_sub_round_ss(W, U, A, B, C) \
1662 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1663
1664#define _mm_maskz_sub_round_ss(U, A, B, C) \
1665 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1666
075691af
AI
1667#endif
1668
50d9ca71
HW
1669/* Constant helper to represent the ternary logic operations among
1670 vector A, B and C. */
1671typedef enum
1672{
1673 _MM_TERNLOG_A = 0xF0,
1674 _MM_TERNLOG_B = 0xCC,
1675 _MM_TERNLOG_C = 0xAA
1676} _MM_TERNLOG_ENUM;
1677
756c5857
AI
1678#ifdef __OPTIMIZE__
1679extern __inline __m512i
1680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1681_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1682 const int __imm)
756c5857 1683{
50d9ca71
HW
1684 return (__m512i)
1685 __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1686 (__v8di) __B,
1687 (__v8di) __C,
1688 (unsigned char) __imm,
1689 (__mmask8) -1);
756c5857
AI
1690}
1691
1692extern __inline __m512i
1693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
b5fd0b71 1695 __m512i __C, const int __imm)
756c5857 1696{
50d9ca71
HW
1697 return (__m512i)
1698 __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1699 (__v8di) __B,
1700 (__v8di) __C,
1701 (unsigned char) __imm,
1702 (__mmask8) __U);
756c5857
AI
1703}
1704
1705extern __inline __m512i
1706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
b5fd0b71 1708 __m512i __C, const int __imm)
756c5857 1709{
50d9ca71
HW
1710 return (__m512i)
1711 __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1712 (__v8di) __B,
1713 (__v8di) __C,
1714 (unsigned char) __imm,
1715 (__mmask8) __U);
756c5857
AI
1716}
1717
1718extern __inline __m512i
1719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1720_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1721 const int __imm)
756c5857 1722{
50d9ca71
HW
1723 return (__m512i)
1724 __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1725 (__v16si) __B,
1726 (__v16si) __C,
1727 (unsigned char) __imm,
1728 (__mmask16) -1);
756c5857
AI
1729}
1730
1731extern __inline __m512i
1732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1733_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
b5fd0b71 1734 __m512i __C, const int __imm)
756c5857 1735{
50d9ca71
HW
1736 return (__m512i)
1737 __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1738 (__v16si) __B,
1739 (__v16si) __C,
1740 (unsigned char) __imm,
1741 (__mmask16) __U);
756c5857
AI
1742}
1743
1744extern __inline __m512i
1745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1746_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
b5fd0b71 1747 __m512i __C, const int __imm)
756c5857 1748{
50d9ca71
HW
1749 return (__m512i)
1750 __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1751 (__v16si) __B,
1752 (__v16si) __C,
1753 (unsigned char) __imm,
1754 (__mmask16) __U);
756c5857
AI
1755}
1756#else
50d9ca71
HW
1757#define _mm512_ternarylogic_epi64(A, B, C, I) \
1758 ((__m512i) \
1759 __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
1760 (__v8di) (__m512i) (B), \
1761 (__v8di) (__m512i) (C), \
1762 (unsigned char) (I), \
1763 (__mmask8) -1))
1764#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1765 ((__m512i) \
1766 __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
1767 (__v8di) (__m512i) (B), \
1768 (__v8di) (__m512i) (C), \
1769 (unsigned char)(I), \
1770 (__mmask8) (U)))
1771#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1772 ((__m512i) \
1773 __builtin_ia32_pternlogq512_maskz ((__v8di) (__m512i) (A), \
1774 (__v8di) (__m512i) (B), \
1775 (__v8di) (__m512i) (C), \
1776 (unsigned char) (I), \
1777 (__mmask8) (U)))
1778#define _mm512_ternarylogic_epi32(A, B, C, I) \
1779 ((__m512i) \
1780 __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
1781 (__v16si) (__m512i) (B), \
1782 (__v16si) (__m512i) (C), \
1783 (unsigned char) (I), \
1784 (__mmask16) -1))
1785#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1786 ((__m512i) \
1787 __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
1788 (__v16si) (__m512i) (B), \
1789 (__v16si) (__m512i) (C), \
1790 (unsigned char) (I), \
1791 (__mmask16) (U)))
1792#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1793 ((__m512i) \
1794 __builtin_ia32_pternlogd512_maskz ((__v16si) (__m512i) (A), \
1795 (__v16si) (__m512i) (B), \
1796 (__v16si) (__m512i) (C), \
1797 (unsigned char) (I), \
1798 (__mmask16) (U)))
756c5857
AI
1799#endif
1800
1801extern __inline __m512d
1802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1803_mm512_rcp14_pd (__m512d __A)
1804{
1805 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1806 (__v8df)
0b192937 1807 _mm512_undefined_pd (),
756c5857
AI
1808 (__mmask8) -1);
1809}
1810
1811extern __inline __m512d
1812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1813_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1814{
1815 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1816 (__v8df) __W,
1817 (__mmask8) __U);
1818}
1819
1820extern __inline __m512d
1821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1822_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1823{
1824 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1825 (__v8df)
1826 _mm512_setzero_pd (),
1827 (__mmask8) __U);
1828}
1829
1830extern __inline __m512
1831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1832_mm512_rcp14_ps (__m512 __A)
1833{
1834 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1835 (__v16sf)
0b192937 1836 _mm512_undefined_ps (),
756c5857
AI
1837 (__mmask16) -1);
1838}
1839
1840extern __inline __m512
1841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1842_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1843{
1844 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1845 (__v16sf) __W,
1846 (__mmask16) __U);
1847}
1848
1849extern __inline __m512
1850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1851_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1852{
1853 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1854 (__v16sf)
1855 _mm512_setzero_ps (),
1856 (__mmask16) __U);
1857}
1858
075691af
AI
1859extern __inline __m128d
1860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1861_mm_rcp14_sd (__m128d __A, __m128d __B)
1862{
df62b4af
IT
1863 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1864 (__v2df) __A);
075691af
AI
1865}
1866
f4ee3a9e
UB
1867extern __inline __m128d
1868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1869_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1870{
1871 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1872 (__v2df) __A,
1873 (__v2df) __W,
1874 (__mmask8) __U);
1875}
1876
1877extern __inline __m128d
1878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1879_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1880{
1881 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1882 (__v2df) __A,
1883 (__v2df) _mm_setzero_ps (),
1884 (__mmask8) __U);
1885}
1886
075691af
AI
1887extern __inline __m128
1888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889_mm_rcp14_ss (__m128 __A, __m128 __B)
1890{
df62b4af
IT
1891 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1892 (__v4sf) __A);
075691af
AI
1893}
1894
f4ee3a9e
UB
1895extern __inline __m128
1896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1897_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1898{
1899 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1900 (__v4sf) __A,
1901 (__v4sf) __W,
1902 (__mmask8) __U);
1903}
1904
1905extern __inline __m128
1906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1907_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1908{
1909 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1910 (__v4sf) __A,
1911 (__v4sf) _mm_setzero_ps (),
1912 (__mmask8) __U);
1913}
1914
756c5857
AI
1915extern __inline __m512d
1916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1917_mm512_rsqrt14_pd (__m512d __A)
1918{
1919 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1920 (__v8df)
0b192937 1921 _mm512_undefined_pd (),
756c5857
AI
1922 (__mmask8) -1);
1923}
1924
1925extern __inline __m512d
1926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1927_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1928{
1929 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1930 (__v8df) __W,
1931 (__mmask8) __U);
1932}
1933
1934extern __inline __m512d
1935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1936_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1937{
1938 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1939 (__v8df)
1940 _mm512_setzero_pd (),
1941 (__mmask8) __U);
1942}
1943
1944extern __inline __m512
1945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1946_mm512_rsqrt14_ps (__m512 __A)
1947{
1948 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1949 (__v16sf)
0b192937 1950 _mm512_undefined_ps (),
756c5857
AI
1951 (__mmask16) -1);
1952}
1953
1954extern __inline __m512
1955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1957{
1958 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1959 (__v16sf) __W,
1960 (__mmask16) __U);
1961}
1962
1963extern __inline __m512
1964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1965_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1966{
1967 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1968 (__v16sf)
1969 _mm512_setzero_ps (),
1970 (__mmask16) __U);
1971}
1972
075691af
AI
1973extern __inline __m128d
1974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1975_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1976{
df62b4af
IT
1977 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1978 (__v2df) __A);
075691af
AI
1979}
1980
d7a33a4c
JK
1981extern __inline __m128d
1982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1983_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1984{
1985 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1986 (__v2df) __A,
1987 (__v2df) __W,
1988 (__mmask8) __U);
1989}
1990
1991extern __inline __m128d
1992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1993_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1994{
1995 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1996 (__v2df) __A,
1997 (__v2df) _mm_setzero_pd (),
1998 (__mmask8) __U);
1999}
2000
075691af
AI
2001extern __inline __m128
2002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2003_mm_rsqrt14_ss (__m128 __A, __m128 __B)
2004{
df62b4af
IT
2005 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
2006 (__v4sf) __A);
075691af
AI
2007}
2008
d7a33a4c
JK
2009extern __inline __m128
2010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
2012{
2013 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
2014 (__v4sf) __A,
2015 (__v4sf) __W,
2016 (__mmask8) __U);
2017}
2018
2019extern __inline __m128
2020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
2022{
2023 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
2024 (__v4sf) __A,
2025 (__v4sf) _mm_setzero_ps (),
2026 (__mmask8) __U);
2027}
2028
756c5857
AI
2029#ifdef __OPTIMIZE__
2030extern __inline __m512d
2031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2032_mm512_sqrt_round_pd (__m512d __A, const int __R)
2033{
2034 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
2035 (__v8df)
0b192937 2036 _mm512_undefined_pd (),
756c5857
AI
2037 (__mmask8) -1, __R);
2038}
2039
2040extern __inline __m512d
2041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2042_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2043 const int __R)
2044{
2045 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
2046 (__v8df) __W,
2047 (__mmask8) __U, __R);
2048}
2049
2050extern __inline __m512d
2051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2052_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
2053{
2054 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
2055 (__v8df)
2056 _mm512_setzero_pd (),
2057 (__mmask8) __U, __R);
2058}
2059
2060extern __inline __m512
2061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2062_mm512_sqrt_round_ps (__m512 __A, const int __R)
2063{
2064 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2065 (__v16sf)
0b192937 2066 _mm512_undefined_ps (),
756c5857
AI
2067 (__mmask16) -1, __R);
2068}
2069
2070extern __inline __m512
2071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
2073{
2074 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2075 (__v16sf) __W,
2076 (__mmask16) __U, __R);
2077}
2078
2079extern __inline __m512
2080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2081_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
2082{
2083 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2084 (__v16sf)
2085 _mm512_setzero_ps (),
2086 (__mmask16) __U, __R);
2087}
2088
075691af
AI
2089extern __inline __m128d
2090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2091_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
2092{
b10bc0d6
OM
2093 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2094 (__v2df) __A,
2095 (__v2df)
2096 _mm_setzero_pd (),
2097 (__mmask8) -1, __R);
2098}
2099
2100extern __inline __m128d
2101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2103 const int __R)
2104{
2105 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2106 (__v2df) __A,
2107 (__v2df) __W,
2108 (__mmask8) __U, __R);
2109}
2110
2111extern __inline __m128d
2112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2113_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
2114{
2115 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2116 (__v2df) __A,
2117 (__v2df)
2118 _mm_setzero_pd (),
2119 (__mmask8) __U, __R);
075691af
AI
2120}
2121
2122extern __inline __m128
2123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2124_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
2125{
b10bc0d6
OM
2126 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2127 (__v4sf) __A,
2128 (__v4sf)
2129 _mm_setzero_ps (),
2130 (__mmask8) -1, __R);
2131}
2132
2133extern __inline __m128
2134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2136 const int __R)
2137{
2138 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2139 (__v4sf) __A,
2140 (__v4sf) __W,
2141 (__mmask8) __U, __R);
2142}
2143
2144extern __inline __m128
2145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
2147{
2148 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2149 (__v4sf) __A,
2150 (__v4sf)
2151 _mm_setzero_ps (),
2152 (__mmask8) __U, __R);
075691af 2153}
756c5857
AI
2154#else
2155#define _mm512_sqrt_round_pd(A, C) \
0b192937 2156 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2157
2158#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
2159 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
2160
2161#define _mm512_maskz_sqrt_round_pd(U, A, C) \
2162 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
2163
2164#define _mm512_sqrt_round_ps(A, C) \
0b192937 2165 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2166
2167#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
2168 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
2169
2170#define _mm512_maskz_sqrt_round_ps(U, A, C) \
2171 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
075691af 2172
b10bc0d6
OM
2173#define _mm_sqrt_round_sd(A, B, C) \
2174 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2175 (__v2df) _mm_setzero_pd (), -1, C)
2176
2177#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
2178 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
2179
2180#define _mm_maskz_sqrt_round_sd(U, A, B, C) \
2181 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2182 (__v2df) _mm_setzero_pd (), U, C)
2183
2184#define _mm_sqrt_round_ss(A, B, C) \
2185 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2186 (__v4sf) _mm_setzero_ps (), -1, C)
2187
2188#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
2189 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
075691af 2190
b10bc0d6
OM
2191#define _mm_maskz_sqrt_round_ss(U, A, B, C) \
2192 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2193 (__v4sf) _mm_setzero_ps (), U, C)
756c5857
AI
2194#endif
2195
93103603
SP
2196#define _mm_mask_sqrt_sd(W, U, A, B) \
2197 _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
2198
2199#define _mm_maskz_sqrt_sd(U, A, B) \
2200 _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
2201
2202#define _mm_mask_sqrt_ss(W, U, A, B) \
2203 _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
2204
2205#define _mm_maskz_sqrt_ss(U, A, B) \
2206 _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
2207
756c5857
AI
2208extern __inline __m512i
2209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2210_mm512_cvtepi8_epi32 (__m128i __A)
2211{
2212 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2213 (__v16si)
4271e5cb 2214 _mm512_undefined_epi32 (),
756c5857
AI
2215 (__mmask16) -1);
2216}
2217
2218extern __inline __m512i
2219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2220_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2221{
2222 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2223 (__v16si) __W,
2224 (__mmask16) __U);
2225}
2226
2227extern __inline __m512i
2228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2229_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2230{
2231 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2232 (__v16si)
2233 _mm512_setzero_si512 (),
2234 (__mmask16) __U);
2235}
2236
2237extern __inline __m512i
2238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2239_mm512_cvtepi8_epi64 (__m128i __A)
2240{
2241 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2242 (__v8di)
4271e5cb 2243 _mm512_undefined_epi32 (),
756c5857
AI
2244 (__mmask8) -1);
2245}
2246
2247extern __inline __m512i
2248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2249_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2250{
2251 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2252 (__v8di) __W,
2253 (__mmask8) __U);
2254}
2255
2256extern __inline __m512i
2257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2258_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2259{
2260 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2261 (__v8di)
2262 _mm512_setzero_si512 (),
2263 (__mmask8) __U);
2264}
2265
2266extern __inline __m512i
2267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2268_mm512_cvtepi16_epi32 (__m256i __A)
2269{
2270 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2271 (__v16si)
4271e5cb 2272 _mm512_undefined_epi32 (),
756c5857
AI
2273 (__mmask16) -1);
2274}
2275
2276extern __inline __m512i
2277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2278_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2279{
2280 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2281 (__v16si) __W,
2282 (__mmask16) __U);
2283}
2284
2285extern __inline __m512i
2286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2288{
2289 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2290 (__v16si)
2291 _mm512_setzero_si512 (),
2292 (__mmask16) __U);
2293}
2294
2295extern __inline __m512i
2296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2297_mm512_cvtepi16_epi64 (__m128i __A)
2298{
2299 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2300 (__v8di)
4271e5cb 2301 _mm512_undefined_epi32 (),
756c5857
AI
2302 (__mmask8) -1);
2303}
2304
2305extern __inline __m512i
2306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2307_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2308{
2309 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2310 (__v8di) __W,
2311 (__mmask8) __U);
2312}
2313
2314extern __inline __m512i
2315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2316_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2317{
2318 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2319 (__v8di)
2320 _mm512_setzero_si512 (),
2321 (__mmask8) __U);
2322}
2323
2324extern __inline __m512i
2325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2326_mm512_cvtepi32_epi64 (__m256i __X)
2327{
2328 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2329 (__v8di)
4271e5cb 2330 _mm512_undefined_epi32 (),
756c5857
AI
2331 (__mmask8) -1);
2332}
2333
2334extern __inline __m512i
2335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2336_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2337{
2338 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2339 (__v8di) __W,
2340 (__mmask8) __U);
2341}
2342
2343extern __inline __m512i
2344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2346{
2347 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2348 (__v8di)
2349 _mm512_setzero_si512 (),
2350 (__mmask8) __U);
2351}
2352
2353extern __inline __m512i
2354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2355_mm512_cvtepu8_epi32 (__m128i __A)
2356{
2357 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2358 (__v16si)
4271e5cb 2359 _mm512_undefined_epi32 (),
756c5857
AI
2360 (__mmask16) -1);
2361}
2362
2363extern __inline __m512i
2364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2365_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2366{
2367 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2368 (__v16si) __W,
2369 (__mmask16) __U);
2370}
2371
2372extern __inline __m512i
2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2375{
2376 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2377 (__v16si)
2378 _mm512_setzero_si512 (),
2379 (__mmask16) __U);
2380}
2381
2382extern __inline __m512i
2383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2384_mm512_cvtepu8_epi64 (__m128i __A)
2385{
2386 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2387 (__v8di)
4271e5cb 2388 _mm512_undefined_epi32 (),
756c5857
AI
2389 (__mmask8) -1);
2390}
2391
2392extern __inline __m512i
2393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2394_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2395{
2396 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2397 (__v8di) __W,
2398 (__mmask8) __U);
2399}
2400
2401extern __inline __m512i
2402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2403_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2404{
2405 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2406 (__v8di)
2407 _mm512_setzero_si512 (),
2408 (__mmask8) __U);
2409}
2410
2411extern __inline __m512i
2412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2413_mm512_cvtepu16_epi32 (__m256i __A)
2414{
2415 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2416 (__v16si)
4271e5cb 2417 _mm512_undefined_epi32 (),
756c5857
AI
2418 (__mmask16) -1);
2419}
2420
2421extern __inline __m512i
2422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2423_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2424{
2425 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2426 (__v16si) __W,
2427 (__mmask16) __U);
2428}
2429
2430extern __inline __m512i
2431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2432_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2433{
2434 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2435 (__v16si)
2436 _mm512_setzero_si512 (),
2437 (__mmask16) __U);
2438}
2439
2440extern __inline __m512i
2441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2442_mm512_cvtepu16_epi64 (__m128i __A)
2443{
2444 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2445 (__v8di)
4271e5cb 2446 _mm512_undefined_epi32 (),
756c5857
AI
2447 (__mmask8) -1);
2448}
2449
2450extern __inline __m512i
2451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2453{
2454 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2455 (__v8di) __W,
2456 (__mmask8) __U);
2457}
2458
2459extern __inline __m512i
2460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2462{
2463 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2464 (__v8di)
2465 _mm512_setzero_si512 (),
2466 (__mmask8) __U);
2467}
2468
2469extern __inline __m512i
2470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2471_mm512_cvtepu32_epi64 (__m256i __X)
2472{
2473 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2474 (__v8di)
4271e5cb 2475 _mm512_undefined_epi32 (),
756c5857
AI
2476 (__mmask8) -1);
2477}
2478
2479extern __inline __m512i
2480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2481_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2482{
2483 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2484 (__v8di) __W,
2485 (__mmask8) __U);
2486}
2487
2488extern __inline __m512i
2489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2490_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2491{
2492 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2493 (__v8di)
2494 _mm512_setzero_si512 (),
2495 (__mmask8) __U);
2496}
2497
2498#ifdef __OPTIMIZE__
2499extern __inline __m512d
2500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2501_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2502{
2503 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2504 (__v8df) __B,
2505 (__v8df)
0b192937 2506 _mm512_undefined_pd (),
756c5857
AI
2507 (__mmask8) -1, __R);
2508}
2509
2510extern __inline __m512d
2511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2512_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2513 __m512d __B, const int __R)
2514{
2515 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2516 (__v8df) __B,
2517 (__v8df) __W,
2518 (__mmask8) __U, __R);
2519}
2520
2521extern __inline __m512d
2522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2523_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2524 const int __R)
2525{
2526 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2527 (__v8df) __B,
2528 (__v8df)
2529 _mm512_setzero_pd (),
2530 (__mmask8) __U, __R);
2531}
2532
2533extern __inline __m512
2534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2535_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2536{
2537 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2538 (__v16sf) __B,
2539 (__v16sf)
0b192937 2540 _mm512_undefined_ps (),
756c5857
AI
2541 (__mmask16) -1, __R);
2542}
2543
2544extern __inline __m512
2545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2546_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2547 __m512 __B, const int __R)
2548{
2549 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2550 (__v16sf) __B,
2551 (__v16sf) __W,
2552 (__mmask16) __U, __R);
2553}
2554
2555extern __inline __m512
2556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2557_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2558{
2559 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2560 (__v16sf) __B,
2561 (__v16sf)
2562 _mm512_setzero_ps (),
2563 (__mmask16) __U, __R);
2564}
2565
2566extern __inline __m512d
2567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2568_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2569{
2570 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2571 (__v8df) __B,
2572 (__v8df)
0b192937 2573 _mm512_undefined_pd (),
756c5857
AI
2574 (__mmask8) -1, __R);
2575}
2576
2577extern __inline __m512d
2578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2579_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2580 __m512d __B, const int __R)
2581{
2582 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2583 (__v8df) __B,
2584 (__v8df) __W,
2585 (__mmask8) __U, __R);
2586}
2587
2588extern __inline __m512d
2589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2590_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2591 const int __R)
2592{
2593 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2594 (__v8df) __B,
2595 (__v8df)
2596 _mm512_setzero_pd (),
2597 (__mmask8) __U, __R);
2598}
2599
2600extern __inline __m512
2601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2602_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2603{
2604 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2605 (__v16sf) __B,
2606 (__v16sf)
0b192937 2607 _mm512_undefined_ps (),
756c5857
AI
2608 (__mmask16) -1, __R);
2609}
2610
2611extern __inline __m512
2612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2613_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2614 __m512 __B, const int __R)
2615{
2616 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2617 (__v16sf) __B,
2618 (__v16sf) __W,
2619 (__mmask16) __U, __R);
2620}
2621
2622extern __inline __m512
2623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2624_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2625{
2626 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2627 (__v16sf) __B,
2628 (__v16sf)
2629 _mm512_setzero_ps (),
2630 (__mmask16) __U, __R);
2631}
2632#else
2633#define _mm512_add_round_pd(A, B, C) \
0b192937 2634 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2635
2636#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2637 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2638
2639#define _mm512_maskz_add_round_pd(U, A, B, C) \
2640 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2641
2642#define _mm512_add_round_ps(A, B, C) \
0b192937 2643 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2644
2645#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2646 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2647
2648#define _mm512_maskz_add_round_ps(U, A, B, C) \
2649 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2650
2651#define _mm512_sub_round_pd(A, B, C) \
0b192937 2652 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2653
2654#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2655 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2656
2657#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2658 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2659
2660#define _mm512_sub_round_ps(A, B, C) \
0b192937 2661 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2662
2663#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2664 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2665
2666#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2667 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2668#endif
2669
2670#ifdef __OPTIMIZE__
2671extern __inline __m512d
2672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2674{
2675 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2676 (__v8df) __B,
2677 (__v8df)
0b192937 2678 _mm512_undefined_pd (),
756c5857
AI
2679 (__mmask8) -1, __R);
2680}
2681
2682extern __inline __m512d
2683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2684_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2685 __m512d __B, const int __R)
2686{
2687 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2688 (__v8df) __B,
2689 (__v8df) __W,
2690 (__mmask8) __U, __R);
2691}
2692
2693extern __inline __m512d
2694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2695_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2696 const int __R)
2697{
2698 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2699 (__v8df) __B,
2700 (__v8df)
2701 _mm512_setzero_pd (),
2702 (__mmask8) __U, __R);
2703}
2704
2705extern __inline __m512
2706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2708{
2709 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2710 (__v16sf) __B,
2711 (__v16sf)
0b192937 2712 _mm512_undefined_ps (),
756c5857
AI
2713 (__mmask16) -1, __R);
2714}
2715
2716extern __inline __m512
2717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2718_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2719 __m512 __B, const int __R)
2720{
2721 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2722 (__v16sf) __B,
2723 (__v16sf) __W,
2724 (__mmask16) __U, __R);
2725}
2726
2727extern __inline __m512
2728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2729_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2730{
2731 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2732 (__v16sf) __B,
2733 (__v16sf)
2734 _mm512_setzero_ps (),
2735 (__mmask16) __U, __R);
2736}
2737
2738extern __inline __m512d
2739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2740_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2741{
2742 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2743 (__v8df) __V,
2744 (__v8df)
0b192937 2745 _mm512_undefined_pd (),
756c5857
AI
2746 (__mmask8) -1, __R);
2747}
2748
2749extern __inline __m512d
2750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2751_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2752 __m512d __V, const int __R)
2753{
2754 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2755 (__v8df) __V,
2756 (__v8df) __W,
2757 (__mmask8) __U, __R);
2758}
2759
2760extern __inline __m512d
2761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2762_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2763 const int __R)
2764{
2765 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2766 (__v8df) __V,
2767 (__v8df)
2768 _mm512_setzero_pd (),
2769 (__mmask8) __U, __R);
2770}
2771
2772extern __inline __m512
2773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2774_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2775{
2776 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2777 (__v16sf) __B,
2778 (__v16sf)
0b192937 2779 _mm512_undefined_ps (),
756c5857
AI
2780 (__mmask16) -1, __R);
2781}
2782
2783extern __inline __m512
2784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2785_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2786 __m512 __B, const int __R)
2787{
2788 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2789 (__v16sf) __B,
2790 (__v16sf) __W,
2791 (__mmask16) __U, __R);
2792}
2793
2794extern __inline __m512
2795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2796_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2797{
2798 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2799 (__v16sf) __B,
2800 (__v16sf)
2801 _mm512_setzero_ps (),
2802 (__mmask16) __U, __R);
2803}
2804
075691af
AI
2805extern __inline __m128d
2806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2808{
2809 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2810 (__v2df) __B,
2811 __R);
2812}
2813
f4ee3a9e
UB
2814extern __inline __m128d
2815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2817 __m128d __B, const int __R)
2818{
2819 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2820 (__v2df) __B,
2821 (__v2df) __W,
2822 (__mmask8) __U, __R);
2823}
2824
2825extern __inline __m128d
2826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2827_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2828 const int __R)
2829{
2830 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2831 (__v2df) __B,
2832 (__v2df)
2833 _mm_setzero_pd (),
2834 (__mmask8) __U, __R);
2835}
2836
075691af
AI
2837extern __inline __m128
2838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2839_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2840{
2841 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2842 (__v4sf) __B,
2843 __R);
2844}
2845
f4ee3a9e
UB
2846extern __inline __m128
2847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2848_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2849 __m128 __B, const int __R)
2850{
2851 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2852 (__v4sf) __B,
2853 (__v4sf) __W,
2854 (__mmask8) __U, __R);
2855}
2856
2857extern __inline __m128
2858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2859_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2860 const int __R)
2861{
2862 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2863 (__v4sf) __B,
2864 (__v4sf)
2865 _mm_setzero_ps (),
2866 (__mmask8) __U, __R);
2867}
2868
075691af
AI
2869extern __inline __m128d
2870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2871_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2872{
2873 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2874 (__v2df) __B,
2875 __R);
2876}
2877
f4ee3a9e
UB
2878extern __inline __m128d
2879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2880_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2881 __m128d __B, const int __R)
2882{
2883 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2884 (__v2df) __B,
2885 (__v2df) __W,
2886 (__mmask8) __U, __R);
2887}
2888
2889extern __inline __m128d
2890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2891_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2892 const int __R)
2893{
2894 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2895 (__v2df) __B,
2896 (__v2df)
2897 _mm_setzero_pd (),
2898 (__mmask8) __U, __R);
2899}
2900
075691af
AI
2901extern __inline __m128
2902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2903_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2904{
2905 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2906 (__v4sf) __B,
2907 __R);
2908}
2909
f4ee3a9e
UB
2910extern __inline __m128
2911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2912_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2913 __m128 __B, const int __R)
2914{
2915 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2916 (__v4sf) __B,
2917 (__v4sf) __W,
2918 (__mmask8) __U, __R);
2919}
2920
2921extern __inline __m128
2922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2923_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2924 const int __R)
2925{
2926 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2927 (__v4sf) __B,
2928 (__v4sf)
2929 _mm_setzero_ps (),
2930 (__mmask8) __U, __R);
2931}
2932
756c5857
AI
2933#else
2934#define _mm512_mul_round_pd(A, B, C) \
0b192937 2935 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2936
2937#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2938 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2939
2940#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2941 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2942
2943#define _mm512_mul_round_ps(A, B, C) \
0b192937 2944 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2945
2946#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2947 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2948
2949#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2950 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2951
2952#define _mm512_div_round_pd(A, B, C) \
0b192937 2953 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2954
2955#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2956 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2957
2958#define _mm512_maskz_div_round_pd(U, A, B, C) \
2959 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2960
2961#define _mm512_div_round_ps(A, B, C) \
0b192937 2962 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2963
2964#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2965 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2966
2967#define _mm512_maskz_div_round_ps(U, A, B, C) \
2968 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2969
2970#define _mm_mul_round_sd(A, B, C) \
2971 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2972
f4ee3a9e
UB
2973#define _mm_mask_mul_round_sd(W, U, A, B, C) \
2974 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2975
2976#define _mm_maskz_mul_round_sd(U, A, B, C) \
2977 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2978
075691af
AI
2979#define _mm_mul_round_ss(A, B, C) \
2980 (__m128)__builtin_ia32_mulss_round(A, B, C)
2981
f4ee3a9e
UB
2982#define _mm_mask_mul_round_ss(W, U, A, B, C) \
2983 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2984
2985#define _mm_maskz_mul_round_ss(U, A, B, C) \
2986 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2987
075691af
AI
2988#define _mm_div_round_sd(A, B, C) \
2989 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2990
f4ee3a9e
UB
2991#define _mm_mask_div_round_sd(W, U, A, B, C) \
2992 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2993
2994#define _mm_maskz_div_round_sd(U, A, B, C) \
2995 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2996
075691af
AI
2997#define _mm_div_round_ss(A, B, C) \
2998 (__m128)__builtin_ia32_divss_round(A, B, C)
f4ee3a9e
UB
2999
3000#define _mm_mask_div_round_ss(W, U, A, B, C) \
3001 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
3002
3003#define _mm_maskz_div_round_ss(U, A, B, C) \
3004 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
3005
756c5857
AI
3006#endif
3007
3008#ifdef __OPTIMIZE__
3009extern __inline __m512d
3010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3011_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
3012{
3013 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
3014 (__v8df) __B,
3015 (__v8df)
0b192937 3016 _mm512_undefined_pd (),
756c5857
AI
3017 (__mmask8) -1, __R);
3018}
3019
3020extern __inline __m512d
3021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3022_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3023 __m512d __B, const int __R)
3024{
3025 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
3026 (__v8df) __B,
3027 (__v8df) __W,
3028 (__mmask8) __U, __R);
3029}
3030
3031extern __inline __m512d
3032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3034 const int __R)
3035{
3036 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
3037 (__v8df) __B,
3038 (__v8df)
3039 _mm512_setzero_pd (),
3040 (__mmask8) __U, __R);
3041}
3042
3043extern __inline __m512
3044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3045_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
3046{
3047 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
3048 (__v16sf) __B,
3049 (__v16sf)
0b192937 3050 _mm512_undefined_ps (),
756c5857
AI
3051 (__mmask16) -1, __R);
3052}
3053
3054extern __inline __m512
3055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3056_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3057 __m512 __B, const int __R)
3058{
3059 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
3060 (__v16sf) __B,
3061 (__v16sf) __W,
3062 (__mmask16) __U, __R);
3063}
3064
3065extern __inline __m512
3066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3067_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
3068{
3069 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
3070 (__v16sf) __B,
3071 (__v16sf)
3072 _mm512_setzero_ps (),
3073 (__mmask16) __U, __R);
3074}
3075
3076extern __inline __m512d
3077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3078_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
3079{
3080 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3081 (__v8df) __B,
3082 (__v8df)
0b192937 3083 _mm512_undefined_pd (),
756c5857
AI
3084 (__mmask8) -1, __R);
3085}
3086
3087extern __inline __m512d
3088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3089_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3090 __m512d __B, const int __R)
3091{
3092 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3093 (__v8df) __B,
3094 (__v8df) __W,
3095 (__mmask8) __U, __R);
3096}
3097
3098extern __inline __m512d
3099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3100_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3101 const int __R)
3102{
3103 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3104 (__v8df) __B,
3105 (__v8df)
3106 _mm512_setzero_pd (),
3107 (__mmask8) __U, __R);
3108}
3109
3110extern __inline __m512
3111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3112_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
3113{
3114 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3115 (__v16sf) __B,
3116 (__v16sf)
0b192937 3117 _mm512_undefined_ps (),
756c5857
AI
3118 (__mmask16) -1, __R);
3119}
3120
3121extern __inline __m512
3122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3123_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3124 __m512 __B, const int __R)
3125{
3126 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3127 (__v16sf) __B,
3128 (__v16sf) __W,
3129 (__mmask16) __U, __R);
3130}
3131
3132extern __inline __m512
3133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3134_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
3135{
3136 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3137 (__v16sf) __B,
3138 (__v16sf)
3139 _mm512_setzero_ps (),
3140 (__mmask16) __U, __R);
3141}
3142#else
3143#define _mm512_max_round_pd(A, B, R) \
0b192937 3144 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
3145
3146#define _mm512_mask_max_round_pd(W, U, A, B, R) \
3147 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
3148
3149#define _mm512_maskz_max_round_pd(U, A, B, R) \
3150 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3151
3152#define _mm512_max_round_ps(A, B, R) \
0b192937 3153 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
756c5857
AI
3154
3155#define _mm512_mask_max_round_ps(W, U, A, B, R) \
3156 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
3157
3158#define _mm512_maskz_max_round_ps(U, A, B, R) \
3159 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3160
3161#define _mm512_min_round_pd(A, B, R) \
0b192937 3162 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
3163
3164#define _mm512_mask_min_round_pd(W, U, A, B, R) \
3165 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
3166
3167#define _mm512_maskz_min_round_pd(U, A, B, R) \
3168 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3169
3170#define _mm512_min_round_ps(A, B, R) \
0b192937 3171 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
756c5857
AI
3172
3173#define _mm512_mask_min_round_ps(W, U, A, B, R) \
3174 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
3175
3176#define _mm512_maskz_min_round_ps(U, A, B, R) \
3177 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3178#endif
3179
3180#ifdef __OPTIMIZE__
3181extern __inline __m512d
3182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3183_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
3184{
3185 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3186 (__v8df) __B,
3187 (__v8df)
0b192937 3188 _mm512_undefined_pd (),
756c5857
AI
3189 (__mmask8) -1, __R);
3190}
3191
3192extern __inline __m512d
3193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3194_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3195 __m512d __B, const int __R)
3196{
3197 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3198 (__v8df) __B,
3199 (__v8df) __W,
3200 (__mmask8) __U, __R);
3201}
3202
3203extern __inline __m512d
3204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3205_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3206 const int __R)
3207{
3208 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3209 (__v8df) __B,
3210 (__v8df)
3211 _mm512_setzero_pd (),
3212 (__mmask8) __U, __R);
3213}
3214
3215extern __inline __m512
3216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3217_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3218{
3219 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3220 (__v16sf) __B,
3221 (__v16sf)
0b192937 3222 _mm512_undefined_ps (),
756c5857
AI
3223 (__mmask16) -1, __R);
3224}
3225
3226extern __inline __m512
3227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3228_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3229 __m512 __B, const int __R)
3230{
3231 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3232 (__v16sf) __B,
3233 (__v16sf) __W,
3234 (__mmask16) __U, __R);
3235}
3236
3237extern __inline __m512
3238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3239_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3240 const int __R)
3241{
3242 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3243 (__v16sf) __B,
3244 (__v16sf)
3245 _mm512_setzero_ps (),
3246 (__mmask16) __U, __R);
3247}
3248
075691af
AI
3249extern __inline __m128d
3250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3251_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3252{
158061a6
OM
3253 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3254 (__v2df) __B,
3255 (__v2df)
3256 _mm_setzero_pd (),
3257 (__mmask8) -1, __R);
3258}
3259
3260extern __inline __m128d
3261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3262_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3263 const int __R)
3264{
3265 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3266 (__v2df) __B,
3267 (__v2df) __W,
3268 (__mmask8) __U, __R);
3269}
3270
3271extern __inline __m128d
3272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3273_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
3274 const int __R)
3275{
3276 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3277 (__v2df) __B,
3278 (__v2df)
3279 _mm_setzero_pd (),
3280 (__mmask8) __U, __R);
075691af
AI
3281}
3282
3283extern __inline __m128
3284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3285_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3286{
158061a6
OM
3287 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3288 (__v4sf) __B,
3289 (__v4sf)
3290 _mm_setzero_ps (),
3291 (__mmask8) -1, __R);
3292}
3293
3294extern __inline __m128
3295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3296_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3297 const int __R)
3298{
3299 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3300 (__v4sf) __B,
3301 (__v4sf) __W,
3302 (__mmask8) __U, __R);
3303}
3304
3305extern __inline __m128
3306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3307_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
3308{
3309 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3310 (__v4sf) __B,
3311 (__v4sf)
3312 _mm_setzero_ps (),
3313 (__mmask8) __U, __R);
075691af 3314}
756c5857 3315#else
3c940d42
HW
3316#define _mm512_scalef_round_pd(A, B, C) \
3317 ((__m512d) \
3318 __builtin_ia32_scalefpd512_mask((A), (B), \
3319 (__v8df) _mm512_undefined_pd(), \
3320 -1, (C)))
3321
3322#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3323 ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
3324
3325#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3326 ((__m512d) \
3327 __builtin_ia32_scalefpd512_mask((A), (B), \
3328 (__v8df) _mm512_setzero_pd(), \
3329 (U), (C)))
3330
3331#define _mm512_scalef_round_ps(A, B, C) \
3332 ((__m512) \
3333 __builtin_ia32_scalefps512_mask((A), (B), \
3334 (__v16sf) _mm512_undefined_ps(), \
3335 -1, (C)))
3336
3337#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3338 ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
3339
3340#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3341 ((__m512) \
3342 __builtin_ia32_scalefps512_mask((A), (B), \
3343 (__v16sf) _mm512_setzero_ps(), \
3344 (U), (C)))
3345
3346#define _mm_scalef_round_sd(A, B, C) \
3347 ((__m128d) \
3348 __builtin_ia32_scalefsd_mask_round ((A), (B), \
3349 (__v2df) _mm_undefined_pd (), \
3350 -1, (C)))
3351
3352#define _mm_scalef_round_ss(A, B, C) \
3353 ((__m128) \
3354 __builtin_ia32_scalefss_mask_round ((A), (B), \
3355 (__v4sf) _mm_undefined_ps (), \
3356 -1, (C)))
3357
3358#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
3359 ((__m128d) \
3360 __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
3361
3362#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
3363 ((__m128) \
3364 __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
3365
3366#define _mm_maskz_scalef_round_sd(U, A, B, C) \
3367 ((__m128d) \
3368 __builtin_ia32_scalefsd_mask_round ((A), (B), \
3369 (__v2df) _mm_setzero_pd (), \
3370 (U), (C)))
3371
3372#define _mm_maskz_scalef_round_ss(U, A, B, C) \
3373 ((__m128) \
3374 __builtin_ia32_scalefss_mask_round ((A), (B), \
3375 (__v4sf) _mm_setzero_ps (), \
3376 (U), (C)))
756c5857
AI
3377#endif
3378
93103603
SP
3379#define _mm_mask_scalef_sd(W, U, A, B) \
3380 _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
3381
3382#define _mm_maskz_scalef_sd(U, A, B) \
3383 _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
3384
3385#define _mm_mask_scalef_ss(W, U, A, B) \
3386 _mm_mask_scalef_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
3387
3388#define _mm_maskz_scalef_ss(U, A, B) \
3389 _mm_maskz_scalef_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
3390
756c5857
AI
3391#ifdef __OPTIMIZE__
3392extern __inline __m512d
3393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3394_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3395{
3396 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3397 (__v8df) __B,
3398 (__v8df) __C,
3399 (__mmask8) -1, __R);
3400}
3401
3402extern __inline __m512d
3403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3404_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3405 __m512d __C, const int __R)
3406{
3407 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3408 (__v8df) __B,
3409 (__v8df) __C,
3410 (__mmask8) __U, __R);
3411}
3412
3413extern __inline __m512d
3414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3415_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3416 __mmask8 __U, const int __R)
3417{
3418 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3419 (__v8df) __B,
3420 (__v8df) __C,
3421 (__mmask8) __U, __R);
3422}
3423
3424extern __inline __m512d
3425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3426_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3427 __m512d __C, const int __R)
3428{
3429 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3430 (__v8df) __B,
3431 (__v8df) __C,
3432 (__mmask8) __U, __R);
3433}
3434
3435extern __inline __m512
3436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3437_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3438{
3439 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3440 (__v16sf) __B,
3441 (__v16sf) __C,
3442 (__mmask16) -1, __R);
3443}
3444
3445extern __inline __m512
3446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3448 __m512 __C, const int __R)
3449{
3450 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3451 (__v16sf) __B,
3452 (__v16sf) __C,
3453 (__mmask16) __U, __R);
3454}
3455
3456extern __inline __m512
3457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3458_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3459 __mmask16 __U, const int __R)
3460{
3461 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3462 (__v16sf) __B,
3463 (__v16sf) __C,
3464 (__mmask16) __U, __R);
3465}
3466
3467extern __inline __m512
3468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3469_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3470 __m512 __C, const int __R)
3471{
3472 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3473 (__v16sf) __B,
3474 (__v16sf) __C,
3475 (__mmask16) __U, __R);
3476}
3477
3478extern __inline __m512d
3479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3480_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3481{
fe7f972d 3482 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 3483 (__v8df) __B,
fe7f972d 3484 (__v8df) __C,
756c5857
AI
3485 (__mmask8) -1, __R);
3486}
3487
3488extern __inline __m512d
3489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3491 __m512d __C, const int __R)
3492{
fe7f972d 3493 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 3494 (__v8df) __B,
fe7f972d 3495 (__v8df) __C,
756c5857
AI
3496 (__mmask8) __U, __R);
3497}
3498
3499extern __inline __m512d
3500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3502 __mmask8 __U, const int __R)
3503{
3504 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3505 (__v8df) __B,
3506 (__v8df) __C,
3507 (__mmask8) __U, __R);
3508}
3509
3510extern __inline __m512d
3511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3513 __m512d __C, const int __R)
3514{
fe7f972d 3515 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
756c5857 3516 (__v8df) __B,
fe7f972d 3517 (__v8df) __C,
756c5857
AI
3518 (__mmask8) __U, __R);
3519}
3520
3521extern __inline __m512
3522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3524{
fe7f972d 3525 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 3526 (__v16sf) __B,
fe7f972d 3527 (__v16sf) __C,
756c5857
AI
3528 (__mmask16) -1, __R);
3529}
3530
3531extern __inline __m512
3532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3533_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3534 __m512 __C, const int __R)
3535{
fe7f972d 3536 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 3537 (__v16sf) __B,
fe7f972d 3538 (__v16sf) __C,
756c5857
AI
3539 (__mmask16) __U, __R);
3540}
3541
3542extern __inline __m512
3543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3544_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3545 __mmask16 __U, const int __R)
3546{
3547 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3548 (__v16sf) __B,
3549 (__v16sf) __C,
3550 (__mmask16) __U, __R);
3551}
3552
3553extern __inline __m512
3554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3555_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3556 __m512 __C, const int __R)
3557{
fe7f972d 3558 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
756c5857 3559 (__v16sf) __B,
fe7f972d 3560 (__v16sf) __C,
756c5857
AI
3561 (__mmask16) __U, __R);
3562}
3563
3564extern __inline __m512d
3565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3567{
3568 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3569 (__v8df) __B,
3570 (__v8df) __C,
3571 (__mmask8) -1, __R);
3572}
3573
3574extern __inline __m512d
3575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3577 __m512d __C, const int __R)
3578{
3579 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3580 (__v8df) __B,
3581 (__v8df) __C,
3582 (__mmask8) __U, __R);
3583}
3584
3585extern __inline __m512d
3586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3587_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3588 __mmask8 __U, const int __R)
3589{
3590 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3591 (__v8df) __B,
3592 (__v8df) __C,
3593 (__mmask8) __U, __R);
3594}
3595
3596extern __inline __m512d
3597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3598_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3599 __m512d __C, const int __R)
3600{
3601 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3602 (__v8df) __B,
3603 (__v8df) __C,
3604 (__mmask8) __U, __R);
3605}
3606
3607extern __inline __m512
3608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3609_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3610{
3611 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3612 (__v16sf) __B,
3613 (__v16sf) __C,
3614 (__mmask16) -1, __R);
3615}
3616
3617extern __inline __m512
3618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3619_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3620 __m512 __C, const int __R)
3621{
3622 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3623 (__v16sf) __B,
3624 (__v16sf) __C,
3625 (__mmask16) __U, __R);
3626}
3627
3628extern __inline __m512
3629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3630_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3631 __mmask16 __U, const int __R)
3632{
3633 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3634 (__v16sf) __B,
3635 (__v16sf) __C,
3636 (__mmask16) __U, __R);
3637}
3638
3639extern __inline __m512
3640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3642 __m512 __C, const int __R)
3643{
3644 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3645 (__v16sf) __B,
3646 (__v16sf) __C,
3647 (__mmask16) __U, __R);
3648}
3649
3650extern __inline __m512d
3651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3652_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3653{
3654 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3655 (__v8df) __B,
3656 -(__v8df) __C,
3657 (__mmask8) -1, __R);
3658}
3659
3660extern __inline __m512d
3661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3662_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3663 __m512d __C, const int __R)
3664{
3665 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3666 (__v8df) __B,
3667 -(__v8df) __C,
3668 (__mmask8) __U, __R);
3669}
3670
3671extern __inline __m512d
3672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3673_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3674 __mmask8 __U, const int __R)
3675{
3676 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3677 (__v8df) __B,
3678 (__v8df) __C,
3679 (__mmask8) __U, __R);
3680}
3681
3682extern __inline __m512d
3683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3684_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3685 __m512d __C, const int __R)
3686{
3687 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3688 (__v8df) __B,
3689 -(__v8df) __C,
3690 (__mmask8) __U, __R);
3691}
3692
3693extern __inline __m512
3694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3695_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3696{
3697 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3698 (__v16sf) __B,
3699 -(__v16sf) __C,
3700 (__mmask16) -1, __R);
3701}
3702
3703extern __inline __m512
3704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3705_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3706 __m512 __C, const int __R)
3707{
3708 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3709 (__v16sf) __B,
3710 -(__v16sf) __C,
3711 (__mmask16) __U, __R);
3712}
3713
3714extern __inline __m512
3715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3716_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3717 __mmask16 __U, const int __R)
3718{
3719 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3720 (__v16sf) __B,
3721 (__v16sf) __C,
3722 (__mmask16) __U, __R);
3723}
3724
3725extern __inline __m512
3726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3727_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3728 __m512 __C, const int __R)
3729{
3730 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3731 (__v16sf) __B,
3732 -(__v16sf) __C,
3733 (__mmask16) __U, __R);
3734}
3735
3736extern __inline __m512d
3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3739{
5ca94977
L
3740 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3741 (__v8df) __B,
3742 (__v8df) __C,
3743 (__mmask8) -1, __R);
756c5857
AI
3744}
3745
3746extern __inline __m512d
3747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3748_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3749 __m512d __C, const int __R)
3750{
3751 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3752 (__v8df) __B,
3753 (__v8df) __C,
3754 (__mmask8) __U, __R);
3755}
3756
3757extern __inline __m512d
3758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3759_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3760 __mmask8 __U, const int __R)
3761{
5ca94977
L
3762 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
3763 (__v8df) __B,
3764 (__v8df) __C,
3765 (__mmask8) __U, __R);
756c5857
AI
3766}
3767
3768extern __inline __m512d
3769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3770_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3771 __m512d __C, const int __R)
3772{
5ca94977
L
3773 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
3774 (__v8df) __B,
3775 (__v8df) __C,
3776 (__mmask8) __U, __R);
756c5857
AI
3777}
3778
3779extern __inline __m512
3780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3781_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3782{
5ca94977
L
3783 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3784 (__v16sf) __B,
3785 (__v16sf) __C,
3786 (__mmask16) -1, __R);
756c5857
AI
3787}
3788
3789extern __inline __m512
3790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3791_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3792 __m512 __C, const int __R)
3793{
3794 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3795 (__v16sf) __B,
3796 (__v16sf) __C,
3797 (__mmask16) __U, __R);
3798}
3799
3800extern __inline __m512
3801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3802_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3803 __mmask16 __U, const int __R)
3804{
5ca94977
L
3805 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
3806 (__v16sf) __B,
3807 (__v16sf) __C,
3808 (__mmask16) __U, __R);
756c5857
AI
3809}
3810
3811extern __inline __m512
3812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3813_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3814 __m512 __C, const int __R)
3815{
5ca94977
L
3816 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
3817 (__v16sf) __B,
3818 (__v16sf) __C,
3819 (__mmask16) __U, __R);
756c5857
AI
3820}
3821
3822extern __inline __m512d
3823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3824_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3825{
38ef6fb1
L
3826 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3827 (__v8df) __B,
3828 (__v8df) __C,
3829 (__mmask8) -1, __R);
756c5857
AI
3830}
3831
3832extern __inline __m512d
3833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3834_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3835 __m512d __C, const int __R)
3836{
3837 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3838 (__v8df) __B,
3839 (__v8df) __C,
3840 (__mmask8) __U, __R);
3841}
3842
3843extern __inline __m512d
3844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3845_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3846 __mmask8 __U, const int __R)
3847{
3848 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3849 (__v8df) __B,
3850 (__v8df) __C,
3851 (__mmask8) __U, __R);
3852}
3853
3854extern __inline __m512d
3855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3856_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3857 __m512d __C, const int __R)
3858{
38ef6fb1
L
3859 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
3860 (__v8df) __B,
3861 (__v8df) __C,
3862 (__mmask8) __U, __R);
756c5857
AI
3863}
3864
3865extern __inline __m512
3866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3867_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3868{
38ef6fb1
L
3869 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3870 (__v16sf) __B,
3871 (__v16sf) __C,
3872 (__mmask16) -1, __R);
756c5857
AI
3873}
3874
3875extern __inline __m512
3876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3877_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3878 __m512 __C, const int __R)
3879{
3880 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3881 (__v16sf) __B,
3882 (__v16sf) __C,
3883 (__mmask16) __U, __R);
3884}
3885
3886extern __inline __m512
3887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3888_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3889 __mmask16 __U, const int __R)
3890{
3891 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3892 (__v16sf) __B,
3893 (__v16sf) __C,
3894 (__mmask16) __U, __R);
3895}
3896
3897extern __inline __m512
3898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3899_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3900 __m512 __C, const int __R)
3901{
38ef6fb1
L
3902 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
3903 (__v16sf) __B,
3904 (__v16sf) __C,
3905 (__mmask16) __U, __R);
756c5857
AI
3906}
3907#else
3908#define _mm512_fmadd_round_pd(A, B, C, R) \
3909 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3910
3911#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3912 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3913
3914#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3915 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3916
3917#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3918 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3919
3920#define _mm512_fmadd_round_ps(A, B, C, R) \
3921 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3922
3923#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3924 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3925
3926#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3927 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3928
3929#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3930 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3931
3932#define _mm512_fmsub_round_pd(A, B, C, R) \
fe7f972d 3933 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
756c5857
AI
3934
3935#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
fe7f972d 3936 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
756c5857
AI
3937
3938#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3939 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3940
3941#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
fe7f972d 3942 (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
756c5857
AI
3943
3944#define _mm512_fmsub_round_ps(A, B, C, R) \
fe7f972d 3945 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
756c5857
AI
3946
3947#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
fe7f972d 3948 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
756c5857
AI
3949
3950#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3951 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3952
3953#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
fe7f972d 3954 (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
756c5857
AI
3955
3956#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3957 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3958
3959#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
a70b9661 3960 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R)
756c5857
AI
3961
3962#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3963 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3964
3965#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3966 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3967
3968#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3969 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3970
3971#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3972 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3973
3974#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3975 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3976
3977#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3978 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3979
3980#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3981 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3982
3983#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3984 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3985
3986#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3987 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3988
3989#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3990 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3991
3992#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3993 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3994
3995#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3996 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3997
3998#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3999 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
4000
4001#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
4002 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
4003
4004#define _mm512_fnmadd_round_pd(A, B, C, R) \
5ca94977 4005 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
756c5857
AI
4006
4007#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
5ca94977 4008 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
756c5857
AI
4009
4010#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
5ca94977 4011 (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
756c5857
AI
4012
4013#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
5ca94977 4014 (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
756c5857
AI
4015
4016#define _mm512_fnmadd_round_ps(A, B, C, R) \
5ca94977 4017 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
756c5857
AI
4018
4019#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
5ca94977 4020 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
756c5857
AI
4021
4022#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
5ca94977 4023 (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
756c5857
AI
4024
4025#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
5ca94977 4026 (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
756c5857
AI
4027
4028#define _mm512_fnmsub_round_pd(A, B, C, R) \
38ef6fb1 4029 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
756c5857
AI
4030
4031#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
4032 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
4033
4034#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
4035 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
4036
4037#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
38ef6fb1 4038 (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
756c5857
AI
4039
4040#define _mm512_fnmsub_round_ps(A, B, C, R) \
38ef6fb1 4041 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
756c5857
AI
4042
4043#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
4044 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
4045
4046#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
4047 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
4048
4049#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
38ef6fb1 4050 (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
756c5857
AI
4051#endif
4052
4053extern __inline __m512i
4054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4055_mm512_abs_epi64 (__m512i __A)
4056{
4057 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
4058 (__v8di)
4271e5cb 4059 _mm512_undefined_epi32 (),
756c5857
AI
4060 (__mmask8) -1);
4061}
4062
4063extern __inline __m512i
4064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4066{
4067 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
4068 (__v8di) __W,
4069 (__mmask8) __U);
4070}
4071
4072extern __inline __m512i
4073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4074_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
4075{
4076 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
4077 (__v8di)
4078 _mm512_setzero_si512 (),
4079 (__mmask8) __U);
4080}
4081
4082extern __inline __m512i
4083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4084_mm512_abs_epi32 (__m512i __A)
4085{
4086 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
4087 (__v16si)
4271e5cb 4088 _mm512_undefined_epi32 (),
756c5857
AI
4089 (__mmask16) -1);
4090}
4091
4092extern __inline __m512i
4093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4094_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4095{
4096 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
4097 (__v16si) __W,
4098 (__mmask16) __U);
4099}
4100
4101extern __inline __m512i
4102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4103_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
4104{
4105 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
4106 (__v16si)
4107 _mm512_setzero_si512 (),
4108 (__mmask16) __U);
4109}
4110
4111extern __inline __m512
4112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4113_mm512_broadcastss_ps (__m128 __A)
4114{
0b192937
UD
4115 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4116 (__v16sf)
4117 _mm512_undefined_ps (),
756c5857
AI
4118 (__mmask16) -1);
4119}
4120
4121extern __inline __m512
4122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4123_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
4124{
4125 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4126 (__v16sf) __O, __M);
4127}
4128
4129extern __inline __m512
4130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4131_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
4132{
4133 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4134 (__v16sf)
4135 _mm512_setzero_ps (),
4136 __M);
4137}
4138
4139extern __inline __m512d
4140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4141_mm512_broadcastsd_pd (__m128d __A)
4142{
0b192937
UD
4143 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4144 (__v8df)
4145 _mm512_undefined_pd (),
756c5857
AI
4146 (__mmask8) -1);
4147}
4148
4149extern __inline __m512d
4150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4151_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
4152{
4153 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4154 (__v8df) __O, __M);
4155}
4156
4157extern __inline __m512d
4158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4159_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
4160{
4161 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4162 (__v8df)
4163 _mm512_setzero_pd (),
4164 __M);
4165}
4166
4167extern __inline __m512i
4168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4169_mm512_broadcastd_epi32 (__m128i __A)
4170{
0b192937
UD
4171 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4172 (__v16si)
4271e5cb 4173 _mm512_undefined_epi32 (),
756c5857
AI
4174 (__mmask16) -1);
4175}
4176
4177extern __inline __m512i
4178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4179_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
4180{
4181 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4182 (__v16si) __O, __M);
4183}
4184
4185extern __inline __m512i
4186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4187_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
4188{
4189 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4190 (__v16si)
4191 _mm512_setzero_si512 (),
4192 __M);
4193}
4194
4195extern __inline __m512i
4196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4197_mm512_set1_epi32 (int __A)
4198{
43373412 4199 return (__m512i)(__v16si)
4200 { __A, __A, __A, __A, __A, __A, __A, __A,
4201 __A, __A, __A, __A, __A, __A, __A, __A };
756c5857
AI
4202}
4203
4204extern __inline __m512i
4205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4206_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
4207{
4208 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
4209 __M);
4210}
4211
4212extern __inline __m512i
4213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
4215{
4216 return (__m512i)
4217 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4218 (__v16si) _mm512_setzero_si512 (),
4219 __M);
4220}
4221
4222extern __inline __m512i
4223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224_mm512_broadcastq_epi64 (__m128i __A)
4225{
0b192937
UD
4226 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4227 (__v8di)
4271e5cb 4228 _mm512_undefined_epi32 (),
756c5857
AI
4229 (__mmask8) -1);
4230}
4231
4232extern __inline __m512i
4233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
4235{
4236 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4237 (__v8di) __O, __M);
4238}
4239
4240extern __inline __m512i
4241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4242_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
4243{
4244 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4245 (__v8di)
4246 _mm512_setzero_si512 (),
4247 __M);
4248}
4249
4250extern __inline __m512i
4251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4252_mm512_set1_epi64 (long long __A)
4253{
43373412 4254 return (__m512i)(__v8di) { __A, __A, __A, __A, __A, __A, __A, __A };
756c5857
AI
4255}
4256
4257extern __inline __m512i
4258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4259_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
4260{
756c5857
AI
4261 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
4262 __M);
756c5857
AI
4263}
4264
4265extern __inline __m512i
4266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4267_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
4268{
756c5857
AI
4269 return (__m512i)
4270 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4271 (__v8di) _mm512_setzero_si512 (),
4272 __M);
756c5857
AI
4273}
4274
4275extern __inline __m512
4276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4277_mm512_broadcast_f32x4 (__m128 __A)
4278{
0b192937
UD
4279 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4280 (__v16sf)
4281 _mm512_undefined_ps (),
756c5857
AI
4282 (__mmask16) -1);
4283}
4284
4285extern __inline __m512
4286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4287_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
4288{
4289 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4290 (__v16sf) __O,
4291 __M);
4292}
4293
4294extern __inline __m512
4295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4296_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
4297{
4298 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4299 (__v16sf)
4300 _mm512_setzero_ps (),
4301 __M);
4302}
4303
4304extern __inline __m512i
4305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4306_mm512_broadcast_i32x4 (__m128i __A)
4307{
756c5857 4308 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
0b192937 4309 (__v16si)
4271e5cb 4310 _mm512_undefined_epi32 (),
756c5857
AI
4311 (__mmask16) -1);
4312}
4313
4314extern __inline __m512i
4315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4317{
4318 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4319 (__v16si) __O,
4320 __M);
4321}
4322
4323extern __inline __m512i
4324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4326{
4327 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4328 (__v16si)
4329 _mm512_setzero_si512 (),
4330 __M);
4331}
4332
4333extern __inline __m512d
4334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4335_mm512_broadcast_f64x4 (__m256d __A)
4336{
756c5857 4337 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
0b192937
UD
4338 (__v8df)
4339 _mm512_undefined_pd (),
756c5857
AI
4340 (__mmask8) -1);
4341}
4342
4343extern __inline __m512d
4344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4345_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4346{
4347 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4348 (__v8df) __O,
4349 __M);
4350}
4351
4352extern __inline __m512d
4353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4354_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4355{
4356 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4357 (__v8df)
4358 _mm512_setzero_pd (),
4359 __M);
4360}
4361
4362extern __inline __m512i
4363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4364_mm512_broadcast_i64x4 (__m256i __A)
4365{
756c5857 4366 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
0b192937 4367 (__v8di)
4271e5cb 4368 _mm512_undefined_epi32 (),
756c5857
AI
4369 (__mmask8) -1);
4370}
4371
4372extern __inline __m512i
4373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4374_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4375{
4376 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4377 (__v8di) __O,
4378 __M);
4379}
4380
4381extern __inline __m512i
4382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4383_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4384{
4385 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4386 (__v8di)
4387 _mm512_setzero_si512 (),
4388 __M);
4389}
4390
4391typedef enum
4392{
4393 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4394 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4395 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4396 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4397 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4398 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4399 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4400 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4401 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4402 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4403 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4404 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4405 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4406 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4407 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4408 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4409 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4410 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4411 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4412 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4413 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4414 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4415 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4416 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4417 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4418 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4419 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4420 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4421 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4422 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4423 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4424 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4425 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4426 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4427 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4428 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4429 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4430 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4431 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4432 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4433 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4434 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4435 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4436 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4437 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4438 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4439 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4440 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4441 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4442 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4443 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4444 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4445 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4446 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4447 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4448 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4449 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4450 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4451 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4452 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4453 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4454 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4455 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4456 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4457 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4458 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4459 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4460 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4461 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4462 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4463 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4464 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4465 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4466 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4467 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4468 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4469 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4470 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4471 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4472 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4473 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4474 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4475 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4476 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4477 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4478 _MM_PERM_DDDD = 0xFF
4479} _MM_PERM_ENUM;
4480
4481#ifdef __OPTIMIZE__
4482extern __inline __m512i
4483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4484_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4485{
4486 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4487 __mask,
4488 (__v16si)
4271e5cb 4489 _mm512_undefined_epi32 (),
756c5857
AI
4490 (__mmask16) -1);
4491}
4492
4493extern __inline __m512i
4494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4495_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4496 _MM_PERM_ENUM __mask)
4497{
4498 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4499 __mask,
4500 (__v16si) __W,
4501 (__mmask16) __U);
4502}
4503
4504extern __inline __m512i
4505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4506_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4507{
4508 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4509 __mask,
4510 (__v16si)
4511 _mm512_setzero_si512 (),
4512 (__mmask16) __U);
4513}
4514
4515extern __inline __m512i
4516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4517_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4518{
4519 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4520 (__v8di) __B, __imm,
4521 (__v8di)
4271e5cb 4522 _mm512_undefined_epi32 (),
756c5857
AI
4523 (__mmask8) -1);
4524}
4525
4526extern __inline __m512i
4527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4528_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4529 __m512i __B, const int __imm)
4530{
4531 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4532 (__v8di) __B, __imm,
4533 (__v8di) __W,
4534 (__mmask8) __U);
4535}
4536
4537extern __inline __m512i
4538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4539_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4540 const int __imm)
4541{
4542 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4543 (__v8di) __B, __imm,
4544 (__v8di)
4545 _mm512_setzero_si512 (),
4546 (__mmask8) __U);
4547}
4548
4549extern __inline __m512i
4550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4551_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4552{
4553 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4554 (__v16si) __B,
4555 __imm,
4556 (__v16si)
4271e5cb 4557 _mm512_undefined_epi32 (),
756c5857
AI
4558 (__mmask16) -1);
4559}
4560
4561extern __inline __m512i
4562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4563_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4564 __m512i __B, const int __imm)
4565{
4566 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4567 (__v16si) __B,
4568 __imm,
4569 (__v16si) __W,
4570 (__mmask16) __U);
4571}
4572
4573extern __inline __m512i
4574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4575_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4576 const int __imm)
4577{
4578 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4579 (__v16si) __B,
4580 __imm,
4581 (__v16si)
4582 _mm512_setzero_si512 (),
4583 (__mmask16) __U);
4584}
4585
4586extern __inline __m512d
4587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4588_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4589{
4590 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4591 (__v8df) __B, __imm,
4592 (__v8df)
0b192937 4593 _mm512_undefined_pd (),
756c5857
AI
4594 (__mmask8) -1);
4595}
4596
4597extern __inline __m512d
4598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4599_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4600 __m512d __B, const int __imm)
4601{
4602 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4603 (__v8df) __B, __imm,
4604 (__v8df) __W,
4605 (__mmask8) __U);
4606}
4607
4608extern __inline __m512d
4609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4610_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4611 const int __imm)
4612{
4613 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4614 (__v8df) __B, __imm,
4615 (__v8df)
4616 _mm512_setzero_pd (),
4617 (__mmask8) __U);
4618}
4619
4620extern __inline __m512
4621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4622_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4623{
4624 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4625 (__v16sf) __B, __imm,
4626 (__v16sf)
0b192937 4627 _mm512_undefined_ps (),
756c5857
AI
4628 (__mmask16) -1);
4629}
4630
4631extern __inline __m512
4632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4634 __m512 __B, const int __imm)
4635{
4636 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4637 (__v16sf) __B, __imm,
4638 (__v16sf) __W,
4639 (__mmask16) __U);
4640}
4641
4642extern __inline __m512
4643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4644_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4645 const int __imm)
4646{
4647 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4648 (__v16sf) __B, __imm,
4649 (__v16sf)
4650 _mm512_setzero_ps (),
4651 (__mmask16) __U);
4652}
4653
4654#else
4655#define _mm512_shuffle_epi32(X, C) \
4656 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 4657 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4658 (__mmask16)-1))
4659
4660#define _mm512_mask_shuffle_epi32(W, U, X, C) \
4661 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4662 (__v16si)(__m512i)(W),\
4663 (__mmask16)(U)))
4664
4665#define _mm512_maskz_shuffle_epi32(U, X, C) \
4666 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4667 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4668 (__mmask16)(U)))
4669
4670#define _mm512_shuffle_i64x2(X, Y, C) \
4671 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4672 (__v8di)(__m512i)(Y), (int)(C),\
4271e5cb 4673 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4674 (__mmask8)-1))
4675
4676#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4677 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4678 (__v8di)(__m512i)(Y), (int)(C),\
4679 (__v8di)(__m512i)(W),\
4680 (__mmask8)(U)))
4681
4682#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4683 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4684 (__v8di)(__m512i)(Y), (int)(C),\
4685 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4686 (__mmask8)(U)))
4687
4688#define _mm512_shuffle_i32x4(X, Y, C) \
4689 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4690 (__v16si)(__m512i)(Y), (int)(C),\
4271e5cb 4691 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4692 (__mmask16)-1))
4693
4694#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4695 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4696 (__v16si)(__m512i)(Y), (int)(C),\
4697 (__v16si)(__m512i)(W),\
4698 (__mmask16)(U)))
4699
4700#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4701 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4702 (__v16si)(__m512i)(Y), (int)(C),\
4703 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4704 (__mmask16)(U)))
4705
4706#define _mm512_shuffle_f64x2(X, Y, C) \
4707 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4708 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 4709 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
4710 (__mmask8)-1))
4711
4712#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4713 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4714 (__v8df)(__m512d)(Y), (int)(C),\
4715 (__v8df)(__m512d)(W),\
4716 (__mmask8)(U)))
4717
4718#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4719 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4720 (__v8df)(__m512d)(Y), (int)(C),\
4721 (__v8df)(__m512d)_mm512_setzero_pd(),\
4722 (__mmask8)(U)))
4723
4724#define _mm512_shuffle_f32x4(X, Y, C) \
4725 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4726 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 4727 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
4728 (__mmask16)-1))
4729
4730#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4731 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4732 (__v16sf)(__m512)(Y), (int)(C),\
4733 (__v16sf)(__m512)(W),\
4734 (__mmask16)(U)))
4735
4736#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4737 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4738 (__v16sf)(__m512)(Y), (int)(C),\
4739 (__v16sf)(__m512)_mm512_setzero_ps(),\
4740 (__mmask16)(U)))
4741#endif
4742
4743extern __inline __m512i
4744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4745_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4746{
4747 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4748 (__v16si) __B,
4749 (__v16si)
4271e5cb 4750 _mm512_undefined_epi32 (),
756c5857
AI
4751 (__mmask16) -1);
4752}
4753
4754extern __inline __m512i
4755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4756_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4757{
4758 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4759 (__v16si) __B,
4760 (__v16si) __W,
4761 (__mmask16) __U);
4762}
4763
4764extern __inline __m512i
4765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4766_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4767{
4768 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4769 (__v16si) __B,
4770 (__v16si)
4771 _mm512_setzero_si512 (),
4772 (__mmask16) __U);
4773}
4774
4775extern __inline __m512i
4776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4777_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4778{
4779 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4780 (__v16si) __B,
4781 (__v16si)
4271e5cb 4782 _mm512_undefined_epi32 (),
756c5857
AI
4783 (__mmask16) -1);
4784}
4785
4786extern __inline __m512i
4787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4789{
4790 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4791 (__v16si) __B,
4792 (__v16si) __W,
4793 (__mmask16) __U);
4794}
4795
4796extern __inline __m512i
4797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4799{
4800 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4801 (__v16si) __B,
4802 (__v16si)
4803 _mm512_setzero_si512 (),
4804 (__mmask16) __U);
4805}
4806
4807extern __inline __m512i
4808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4809_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4810{
4811 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4812 (__v8di) __B,
4813 (__v8di)
4271e5cb 4814 _mm512_undefined_epi32 (),
756c5857
AI
4815 (__mmask8) -1);
4816}
4817
4818extern __inline __m512i
4819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4820_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4821{
4822 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4823 (__v8di) __B,
4824 (__v8di) __W,
4825 (__mmask8) __U);
4826}
4827
4828extern __inline __m512i
4829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4830_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4831{
4832 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4833 (__v8di) __B,
4834 (__v8di)
4835 _mm512_setzero_si512 (),
4836 (__mmask8) __U);
4837}
4838
4839extern __inline __m512i
4840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4841_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4842{
4843 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4844 (__v8di) __B,
4845 (__v8di)
4271e5cb 4846 _mm512_undefined_epi32 (),
756c5857
AI
4847 (__mmask8) -1);
4848}
4849
4850extern __inline __m512i
4851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4853{
4854 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4855 (__v8di) __B,
4856 (__v8di) __W,
4857 (__mmask8) __U);
4858}
4859
4860extern __inline __m512i
4861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4862_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4863{
4864 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4865 (__v8di) __B,
4866 (__v8di)
4867 _mm512_setzero_si512 (),
4868 (__mmask8) __U);
4869}
4870
4871#ifdef __OPTIMIZE__
4872extern __inline __m256i
4873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4874_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4875{
4876 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4877 (__v8si)
0b192937 4878 _mm256_undefined_si256 (),
756c5857
AI
4879 (__mmask8) -1, __R);
4880}
4881
4882extern __inline __m256i
4883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4884_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4885 const int __R)
4886{
4887 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4888 (__v8si) __W,
4889 (__mmask8) __U, __R);
4890}
4891
4892extern __inline __m256i
4893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4895{
4896 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4897 (__v8si)
4898 _mm256_setzero_si256 (),
4899 (__mmask8) __U, __R);
4900}
4901
4902extern __inline __m256i
4903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4905{
4906 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4907 (__v8si)
0b192937 4908 _mm256_undefined_si256 (),
756c5857
AI
4909 (__mmask8) -1, __R);
4910}
4911
4912extern __inline __m256i
4913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4915 const int __R)
4916{
4917 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4918 (__v8si) __W,
4919 (__mmask8) __U, __R);
4920}
4921
4922extern __inline __m256i
4923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4924_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4925{
4926 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4927 (__v8si)
4928 _mm256_setzero_si256 (),
4929 (__mmask8) __U, __R);
4930}
4931#else
4932#define _mm512_cvtt_roundpd_epi32(A, B) \
0b192937 4933 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4934
4935#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4936 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4937
4938#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4939 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4940
4941#define _mm512_cvtt_roundpd_epu32(A, B) \
0b192937 4942 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4943
4944#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4945 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4946
4947#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4948 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4949#endif
4950
4951#ifdef __OPTIMIZE__
4952extern __inline __m256i
4953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4954_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4955{
4956 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4957 (__v8si)
0b192937 4958 _mm256_undefined_si256 (),
756c5857
AI
4959 (__mmask8) -1, __R);
4960}
4961
4962extern __inline __m256i
4963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4964_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4965 const int __R)
4966{
4967 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4968 (__v8si) __W,
4969 (__mmask8) __U, __R);
4970}
4971
4972extern __inline __m256i
4973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4974_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4975{
4976 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4977 (__v8si)
4978 _mm256_setzero_si256 (),
4979 (__mmask8) __U, __R);
4980}
4981
4982extern __inline __m256i
4983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4984_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4985{
4986 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4987 (__v8si)
0b192937 4988 _mm256_undefined_si256 (),
756c5857
AI
4989 (__mmask8) -1, __R);
4990}
4991
4992extern __inline __m256i
4993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4994_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4995 const int __R)
4996{
4997 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4998 (__v8si) __W,
4999 (__mmask8) __U, __R);
5000}
5001
5002extern __inline __m256i
5003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5004_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
5005{
5006 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
5007 (__v8si)
5008 _mm256_setzero_si256 (),
5009 (__mmask8) __U, __R);
5010}
5011#else
5012#define _mm512_cvt_roundpd_epi32(A, B) \
0b192937 5013 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
5014
5015#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
5016 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
5017
5018#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
5019 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
5020
5021#define _mm512_cvt_roundpd_epu32(A, B) \
0b192937 5022 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
5023
5024#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
5025 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
5026
5027#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
5028 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
5029#endif
5030
5031#ifdef __OPTIMIZE__
5032extern __inline __m512i
5033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
5035{
5036 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
5037 (__v16si)
4271e5cb 5038 _mm512_undefined_epi32 (),
756c5857
AI
5039 (__mmask16) -1, __R);
5040}
5041
5042extern __inline __m512i
5043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
5045 const int __R)
5046{
5047 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
5048 (__v16si) __W,
5049 (__mmask16) __U, __R);
5050}
5051
5052extern __inline __m512i
5053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5054_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
5055{
5056 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
5057 (__v16si)
5058 _mm512_setzero_si512 (),
5059 (__mmask16) __U, __R);
5060}
5061
5062extern __inline __m512i
5063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5064_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
5065{
5066 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
5067 (__v16si)
4271e5cb 5068 _mm512_undefined_epi32 (),
756c5857
AI
5069 (__mmask16) -1, __R);
5070}
5071
5072extern __inline __m512i
5073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5074_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
5075 const int __R)
5076{
5077 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
5078 (__v16si) __W,
5079 (__mmask16) __U, __R);
5080}
5081
5082extern __inline __m512i
5083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5084_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
5085{
5086 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
5087 (__v16si)
5088 _mm512_setzero_si512 (),
5089 (__mmask16) __U, __R);
5090}
5091#else
5092#define _mm512_cvtt_roundps_epi32(A, B) \
4271e5cb 5093 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5094
5095#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
5096 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
5097
5098#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
5099 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5100
5101#define _mm512_cvtt_roundps_epu32(A, B) \
4271e5cb 5102 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5103
5104#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
5105 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
5106
5107#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
5108 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5109#endif
5110
5111#ifdef __OPTIMIZE__
5112extern __inline __m512i
5113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
5115{
5116 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5117 (__v16si)
4271e5cb 5118 _mm512_undefined_epi32 (),
756c5857
AI
5119 (__mmask16) -1, __R);
5120}
5121
5122extern __inline __m512i
5123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5124_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
5125 const int __R)
5126{
5127 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5128 (__v16si) __W,
5129 (__mmask16) __U, __R);
5130}
5131
5132extern __inline __m512i
5133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5134_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
5135{
5136 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5137 (__v16si)
5138 _mm512_setzero_si512 (),
5139 (__mmask16) __U, __R);
5140}
5141
5142extern __inline __m512i
5143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5144_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
5145{
5146 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5147 (__v16si)
4271e5cb 5148 _mm512_undefined_epi32 (),
756c5857
AI
5149 (__mmask16) -1, __R);
5150}
5151
5152extern __inline __m512i
5153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5154_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
5155 const int __R)
5156{
5157 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5158 (__v16si) __W,
5159 (__mmask16) __U, __R);
5160}
5161
5162extern __inline __m512i
5163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5164_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
5165{
5166 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5167 (__v16si)
5168 _mm512_setzero_si512 (),
5169 (__mmask16) __U, __R);
5170}
5171#else
5172#define _mm512_cvt_roundps_epi32(A, B) \
4271e5cb 5173 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5174
5175#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
5176 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
5177
5178#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
5179 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5180
5181#define _mm512_cvt_roundps_epu32(A, B) \
4271e5cb 5182 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5183
5184#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
5185 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
5186
5187#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
5188 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5189#endif
5190
5191extern __inline __m128d
5192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5193_mm_cvtu32_sd (__m128d __A, unsigned __B)
5194{
5195 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
5196}
5197
5198#ifdef __x86_64__
5199#ifdef __OPTIMIZE__
5200extern __inline __m128d
5201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5202_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
5203{
5204 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
5205}
5206
5207extern __inline __m128d
5208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5209_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
5210{
5211 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5212}
5213
5214extern __inline __m128d
5215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5216_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
5217{
5218 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5219}
5220#else
5221#define _mm_cvt_roundu64_sd(A, B, C) \
5222 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
5223
5224#define _mm_cvt_roundi64_sd(A, B, C) \
5225 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5226
5227#define _mm_cvt_roundsi64_sd(A, B, C) \
5228 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5229#endif
5230
5231#endif
5232
5233#ifdef __OPTIMIZE__
5234extern __inline __m128
5235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5236_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
5237{
5238 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
5239}
5240
5241extern __inline __m128
5242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5243_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
5244{
5245 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5246}
5247
5248extern __inline __m128
5249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5250_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
5251{
5252 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5253}
5254#else
5255#define _mm_cvt_roundu32_ss(A, B, C) \
5256 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
5257
5258#define _mm_cvt_roundi32_ss(A, B, C) \
5259 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5260
5261#define _mm_cvt_roundsi32_ss(A, B, C) \
5262 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5263#endif
5264
5265#ifdef __x86_64__
5266#ifdef __OPTIMIZE__
5267extern __inline __m128
5268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
5270{
5271 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
5272}
5273
5274extern __inline __m128
5275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5276_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
5277{
5278 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5279}
5280
5281extern __inline __m128
5282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5283_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
5284{
5285 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5286}
5287#else
5288#define _mm_cvt_roundu64_ss(A, B, C) \
5289 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
5290
5291#define _mm_cvt_roundi64_ss(A, B, C) \
5292 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5293
5294#define _mm_cvt_roundsi64_ss(A, B, C) \
5295 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5296#endif
5297
5298#endif
5299
5300extern __inline __m128i
5301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5302_mm512_cvtepi32_epi8 (__m512i __A)
5303{
0b192937
UD
5304 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5305 (__v16qi)
5306 _mm_undefined_si128 (),
756c5857
AI
5307 (__mmask16) -1);
5308}
5309
d256b866
IT
5310extern __inline void
5311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5312_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5313{
5314 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5315}
5316
756c5857
AI
5317extern __inline __m128i
5318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5319_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5320{
5321 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5322 (__v16qi) __O, __M);
5323}
5324
5325extern __inline __m128i
5326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5327_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5328{
5329 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5330 (__v16qi)
5331 _mm_setzero_si128 (),
5332 __M);
5333}
5334
5335extern __inline __m128i
5336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5337_mm512_cvtsepi32_epi8 (__m512i __A)
5338{
0b192937
UD
5339 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5340 (__v16qi)
5341 _mm_undefined_si128 (),
756c5857
AI
5342 (__mmask16) -1);
5343}
5344
d256b866
IT
5345extern __inline void
5346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5347_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5348{
5349 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5350}
5351
756c5857
AI
5352extern __inline __m128i
5353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5354_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5355{
5356 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5357 (__v16qi) __O, __M);
5358}
5359
5360extern __inline __m128i
5361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5362_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5363{
5364 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5365 (__v16qi)
5366 _mm_setzero_si128 (),
5367 __M);
5368}
5369
5370extern __inline __m128i
5371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5372_mm512_cvtusepi32_epi8 (__m512i __A)
5373{
0b192937
UD
5374 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5375 (__v16qi)
5376 _mm_undefined_si128 (),
756c5857
AI
5377 (__mmask16) -1);
5378}
5379
d256b866
IT
5380extern __inline void
5381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5382_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5383{
5384 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5385}
5386
756c5857
AI
5387extern __inline __m128i
5388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5389_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5390{
5391 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5392 (__v16qi) __O,
5393 __M);
5394}
5395
5396extern __inline __m128i
5397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5398_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5399{
5400 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5401 (__v16qi)
5402 _mm_setzero_si128 (),
5403 __M);
5404}
5405
5406extern __inline __m256i
5407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5408_mm512_cvtepi32_epi16 (__m512i __A)
5409{
0b192937
UD
5410 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5411 (__v16hi)
5412 _mm256_undefined_si256 (),
756c5857
AI
5413 (__mmask16) -1);
5414}
5415
d256b866
IT
5416extern __inline void
5417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5418_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5419{
5420 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5421}
5422
756c5857
AI
5423extern __inline __m256i
5424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5426{
5427 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5428 (__v16hi) __O, __M);
5429}
5430
5431extern __inline __m256i
5432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5433_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5434{
5435 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5436 (__v16hi)
5437 _mm256_setzero_si256 (),
5438 __M);
5439}
5440
5441extern __inline __m256i
5442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5443_mm512_cvtsepi32_epi16 (__m512i __A)
5444{
0b192937
UD
5445 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5446 (__v16hi)
5447 _mm256_undefined_si256 (),
756c5857
AI
5448 (__mmask16) -1);
5449}
5450
d256b866
IT
5451extern __inline void
5452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5453_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5454{
5455 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5456}
5457
756c5857
AI
5458extern __inline __m256i
5459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5460_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5461{
5462 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5463 (__v16hi) __O, __M);
5464}
5465
5466extern __inline __m256i
5467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5468_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5469{
5470 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5471 (__v16hi)
5472 _mm256_setzero_si256 (),
5473 __M);
5474}
5475
5476extern __inline __m256i
5477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5478_mm512_cvtusepi32_epi16 (__m512i __A)
5479{
0b192937
UD
5480 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5481 (__v16hi)
5482 _mm256_undefined_si256 (),
756c5857
AI
5483 (__mmask16) -1);
5484}
5485
d256b866
IT
5486extern __inline void
5487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5488_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5489{
5490 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5491}
5492
756c5857
AI
5493extern __inline __m256i
5494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5495_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5496{
5497 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5498 (__v16hi) __O,
5499 __M);
5500}
5501
5502extern __inline __m256i
5503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5504_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5505{
5506 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5507 (__v16hi)
5508 _mm256_setzero_si256 (),
5509 __M);
5510}
5511
5512extern __inline __m256i
5513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5514_mm512_cvtepi64_epi32 (__m512i __A)
5515{
0b192937
UD
5516 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5517 (__v8si)
5518 _mm256_undefined_si256 (),
756c5857
AI
5519 (__mmask8) -1);
5520}
5521
d256b866
IT
5522extern __inline void
5523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5524_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5525{
5526 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5527}
5528
756c5857
AI
5529extern __inline __m256i
5530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5531_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5532{
5533 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5534 (__v8si) __O, __M);
5535}
5536
5537extern __inline __m256i
5538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5539_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5540{
5541 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5542 (__v8si)
5543 _mm256_setzero_si256 (),
5544 __M);
5545}
5546
5547extern __inline __m256i
5548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5549_mm512_cvtsepi64_epi32 (__m512i __A)
5550{
0b192937
UD
5551 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5552 (__v8si)
5553 _mm256_undefined_si256 (),
756c5857
AI
5554 (__mmask8) -1);
5555}
5556
d256b866
IT
5557extern __inline void
5558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5559_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5560{
5561 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5562}
5563
756c5857
AI
5564extern __inline __m256i
5565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5566_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5567{
5568 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5569 (__v8si) __O, __M);
5570}
5571
5572extern __inline __m256i
5573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5574_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5575{
5576 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5577 (__v8si)
5578 _mm256_setzero_si256 (),
5579 __M);
5580}
5581
5582extern __inline __m256i
5583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5584_mm512_cvtusepi64_epi32 (__m512i __A)
5585{
0b192937
UD
5586 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5587 (__v8si)
5588 _mm256_undefined_si256 (),
756c5857
AI
5589 (__mmask8) -1);
5590}
5591
6fb82517 5592extern __inline void
d256b866
IT
5593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5594_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5595{
5596 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5597}
5598
756c5857
AI
5599extern __inline __m256i
5600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5601_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5602{
5603 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5604 (__v8si) __O, __M);
5605}
5606
5607extern __inline __m256i
5608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5609_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5610{
5611 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5612 (__v8si)
5613 _mm256_setzero_si256 (),
5614 __M);
5615}
5616
5617extern __inline __m128i
5618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5619_mm512_cvtepi64_epi16 (__m512i __A)
5620{
0b192937
UD
5621 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5622 (__v8hi)
5623 _mm_undefined_si128 (),
756c5857
AI
5624 (__mmask8) -1);
5625}
5626
d256b866
IT
5627extern __inline void
5628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5630{
5631 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5632}
5633
756c5857
AI
5634extern __inline __m128i
5635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5636_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5637{
5638 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5639 (__v8hi) __O, __M);
5640}
5641
5642extern __inline __m128i
5643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5644_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5645{
5646 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5647 (__v8hi)
5648 _mm_setzero_si128 (),
5649 __M);
5650}
5651
5652extern __inline __m128i
5653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5654_mm512_cvtsepi64_epi16 (__m512i __A)
5655{
0b192937
UD
5656 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5657 (__v8hi)
5658 _mm_undefined_si128 (),
756c5857
AI
5659 (__mmask8) -1);
5660}
5661
d256b866
IT
5662extern __inline void
5663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5664_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5665{
5666 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5667}
5668
756c5857
AI
5669extern __inline __m128i
5670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5671_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5672{
5673 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5674 (__v8hi) __O, __M);
5675}
5676
5677extern __inline __m128i
5678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5679_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5680{
5681 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5682 (__v8hi)
5683 _mm_setzero_si128 (),
5684 __M);
5685}
5686
5687extern __inline __m128i
5688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5689_mm512_cvtusepi64_epi16 (__m512i __A)
5690{
0b192937
UD
5691 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5692 (__v8hi)
5693 _mm_undefined_si128 (),
756c5857
AI
5694 (__mmask8) -1);
5695}
5696
d256b866
IT
5697extern __inline void
5698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5699_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5700{
5701 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5702}
5703
756c5857
AI
5704extern __inline __m128i
5705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5706_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5707{
5708 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5709 (__v8hi) __O, __M);
5710}
5711
5712extern __inline __m128i
5713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5714_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5715{
5716 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5717 (__v8hi)
5718 _mm_setzero_si128 (),
5719 __M);
5720}
5721
5722extern __inline __m128i
5723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5724_mm512_cvtepi64_epi8 (__m512i __A)
5725{
0b192937
UD
5726 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5727 (__v16qi)
5728 _mm_undefined_si128 (),
756c5857
AI
5729 (__mmask8) -1);
5730}
5731
d256b866
IT
5732extern __inline void
5733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5734_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5735{
4a948703 5736 __builtin_ia32_pmovqb512mem_mask ((unsigned long long *) __P,
5737 (__v8di) __A, __M);
d256b866
IT
5738}
5739
756c5857
AI
5740extern __inline __m128i
5741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5742_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5743{
5744 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5745 (__v16qi) __O, __M);
5746}
5747
5748extern __inline __m128i
5749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5750_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5751{
5752 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5753 (__v16qi)
5754 _mm_setzero_si128 (),
5755 __M);
5756}
5757
5758extern __inline __m128i
5759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5760_mm512_cvtsepi64_epi8 (__m512i __A)
5761{
0b192937
UD
5762 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5763 (__v16qi)
5764 _mm_undefined_si128 (),
756c5857
AI
5765 (__mmask8) -1);
5766}
5767
d256b866
IT
5768extern __inline void
5769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5770_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5771{
4a948703 5772 __builtin_ia32_pmovsqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
d256b866
IT
5773}
5774
756c5857
AI
5775extern __inline __m128i
5776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5777_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5778{
5779 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5780 (__v16qi) __O, __M);
5781}
5782
5783extern __inline __m128i
5784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5785_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5786{
5787 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5788 (__v16qi)
5789 _mm_setzero_si128 (),
5790 __M);
5791}
5792
5793extern __inline __m128i
5794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5795_mm512_cvtusepi64_epi8 (__m512i __A)
5796{
0b192937
UD
5797 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5798 (__v16qi)
5799 _mm_undefined_si128 (),
756c5857
AI
5800 (__mmask8) -1);
5801}
5802
d256b866
IT
5803extern __inline void
5804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5805_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5806{
4a948703 5807 __builtin_ia32_pmovusqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
d256b866
IT
5808}
5809
756c5857
AI
5810extern __inline __m128i
5811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5812_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5813{
5814 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5815 (__v16qi) __O,
5816 __M);
5817}
5818
5819extern __inline __m128i
5820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5821_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5822{
5823 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5824 (__v16qi)
5825 _mm_setzero_si128 (),
5826 __M);
5827}
5828
5829extern __inline __m512d
5830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5831_mm512_cvtepi32_pd (__m256i __A)
5832{
5833 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5834 (__v8df)
0b192937 5835 _mm512_undefined_pd (),
756c5857
AI
5836 (__mmask8) -1);
5837}
5838
5839extern __inline __m512d
5840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5841_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5842{
5843 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5844 (__v8df) __W,
5845 (__mmask8) __U);
5846}
5847
5848extern __inline __m512d
5849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5850_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5851{
5852 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5853 (__v8df)
5854 _mm512_setzero_pd (),
5855 (__mmask8) __U);
5856}
5857
5858extern __inline __m512d
5859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5860_mm512_cvtepu32_pd (__m256i __A)
5861{
5862 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5863 (__v8df)
0b192937 5864 _mm512_undefined_pd (),
756c5857
AI
5865 (__mmask8) -1);
5866}
5867
5868extern __inline __m512d
5869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5870_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5871{
5872 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5873 (__v8df) __W,
5874 (__mmask8) __U);
5875}
5876
5877extern __inline __m512d
5878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5879_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5880{
5881 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5882 (__v8df)
5883 _mm512_setzero_pd (),
5884 (__mmask8) __U);
5885}
5886
5887#ifdef __OPTIMIZE__
5888extern __inline __m512
5889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5890_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5891{
5892 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5893 (__v16sf)
0b192937 5894 _mm512_undefined_ps (),
756c5857
AI
5895 (__mmask16) -1, __R);
5896}
5897
5898extern __inline __m512
5899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5900_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5901 const int __R)
5902{
5903 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5904 (__v16sf) __W,
5905 (__mmask16) __U, __R);
5906}
5907
5908extern __inline __m512
5909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5910_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5911{
5912 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5913 (__v16sf)
5914 _mm512_setzero_ps (),
5915 (__mmask16) __U, __R);
5916}
5917
5918extern __inline __m512
5919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5920_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5921{
5922 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5923 (__v16sf)
0b192937 5924 _mm512_undefined_ps (),
756c5857
AI
5925 (__mmask16) -1, __R);
5926}
5927
5928extern __inline __m512
5929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5931 const int __R)
5932{
5933 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5934 (__v16sf) __W,
5935 (__mmask16) __U, __R);
5936}
5937
5938extern __inline __m512
5939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5941{
5942 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5943 (__v16sf)
5944 _mm512_setzero_ps (),
5945 (__mmask16) __U, __R);
5946}
5947
5948#else
5949#define _mm512_cvt_roundepi32_ps(A, B) \
0b192937 5950 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5951
5952#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5953 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5954
5955#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5956 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5957
5958#define _mm512_cvt_roundepu32_ps(A, B) \
0b192937 5959 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5960
5961#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5962 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5963
5964#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5965 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5966#endif
5967
5968#ifdef __OPTIMIZE__
5969extern __inline __m256d
5970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5971_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5972{
5973 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5974 __imm,
5975 (__v4df)
0b192937 5976 _mm256_undefined_pd (),
756c5857
AI
5977 (__mmask8) -1);
5978}
5979
5980extern __inline __m256d
5981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5982_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5983 const int __imm)
5984{
5985 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5986 __imm,
5987 (__v4df) __W,
5988 (__mmask8) __U);
5989}
5990
5991extern __inline __m256d
5992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5993_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5994{
5995 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5996 __imm,
5997 (__v4df)
5998 _mm256_setzero_pd (),
5999 (__mmask8) __U);
6000}
6001
6002extern __inline __m128
6003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6004_mm512_extractf32x4_ps (__m512 __A, const int __imm)
6005{
6006 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
6007 __imm,
6008 (__v4sf)
0b192937 6009 _mm_undefined_ps (),
756c5857
AI
6010 (__mmask8) -1);
6011}
6012
6013extern __inline __m128
6014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6015_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
6016 const int __imm)
6017{
6018 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
6019 __imm,
6020 (__v4sf) __W,
6021 (__mmask8) __U);
6022}
6023
6024extern __inline __m128
6025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6026_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
6027{
6028 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
6029 __imm,
6030 (__v4sf)
6031 _mm_setzero_ps (),
6032 (__mmask8) __U);
6033}
6034
6035extern __inline __m256i
6036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6037_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
6038{
6039 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
6040 __imm,
6041 (__v4di)
0b192937 6042 _mm256_undefined_si256 (),
756c5857
AI
6043 (__mmask8) -1);
6044}
6045
6046extern __inline __m256i
6047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6048_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
6049 const int __imm)
6050{
6051 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
6052 __imm,
6053 (__v4di) __W,
6054 (__mmask8) __U);
6055}
6056
6057extern __inline __m256i
6058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6059_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
6060{
6061 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
6062 __imm,
6063 (__v4di)
6064 _mm256_setzero_si256 (),
6065 (__mmask8) __U);
6066}
6067
6068extern __inline __m128i
6069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6070_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
6071{
6072 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
6073 __imm,
6074 (__v4si)
0b192937 6075 _mm_undefined_si128 (),
756c5857
AI
6076 (__mmask8) -1);
6077}
6078
6079extern __inline __m128i
6080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6081_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
6082 const int __imm)
6083{
6084 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
6085 __imm,
6086 (__v4si) __W,
6087 (__mmask8) __U);
6088}
6089
6090extern __inline __m128i
6091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
6093{
6094 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
6095 __imm,
6096 (__v4si)
6097 _mm_setzero_si128 (),
6098 (__mmask8) __U);
6099}
6100#else
6101
6102#define _mm512_extractf64x4_pd(X, C) \
6103 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
6104 (int) (C),\
0b192937 6105 (__v4df)(__m256d)_mm256_undefined_pd(),\
756c5857
AI
6106 (__mmask8)-1))
6107
6108#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
6109 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
6110 (int) (C),\
6111 (__v4df)(__m256d)(W),\
6112 (__mmask8)(U)))
6113
6114#define _mm512_maskz_extractf64x4_pd(U, X, C) \
6115 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
6116 (int) (C),\
6117 (__v4df)(__m256d)_mm256_setzero_pd(),\
6118 (__mmask8)(U)))
6119
6120#define _mm512_extractf32x4_ps(X, C) \
6121 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6122 (int) (C),\
0b192937 6123 (__v4sf)(__m128)_mm_undefined_ps(),\
756c5857
AI
6124 (__mmask8)-1))
6125
6126#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
6127 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6128 (int) (C),\
6129 (__v4sf)(__m128)(W),\
6130 (__mmask8)(U)))
6131
6132#define _mm512_maskz_extractf32x4_ps(U, X, C) \
6133 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6134 (int) (C),\
6135 (__v4sf)(__m128)_mm_setzero_ps(),\
6136 (__mmask8)(U)))
6137
6138#define _mm512_extracti64x4_epi64(X, C) \
6139 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6140 (int) (C),\
0b192937 6141 (__v4di)(__m256i)_mm256_undefined_si256 (),\
756c5857
AI
6142 (__mmask8)-1))
6143
6144#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
6145 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6146 (int) (C),\
6147 (__v4di)(__m256i)(W),\
6148 (__mmask8)(U)))
6149
6150#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
6151 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6152 (int) (C),\
6153 (__v4di)(__m256i)_mm256_setzero_si256 (),\
6154 (__mmask8)(U)))
6155
6156#define _mm512_extracti32x4_epi32(X, C) \
6157 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6158 (int) (C),\
0b192937 6159 (__v4si)(__m128i)_mm_undefined_si128 (),\
756c5857
AI
6160 (__mmask8)-1))
6161
6162#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
6163 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6164 (int) (C),\
6165 (__v4si)(__m128i)(W),\
6166 (__mmask8)(U)))
6167
6168#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
6169 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6170 (int) (C),\
6171 (__v4si)(__m128i)_mm_setzero_si128 (),\
6172 (__mmask8)(U)))
6173#endif
6174
6175#ifdef __OPTIMIZE__
6176extern __inline __m512i
6177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6178_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
6179{
6180 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
6181 (__v4si) __B,
6182 __imm,
6183 (__v16si) __A, -1);
6184}
6185
6186extern __inline __m512
6187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6188_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
6189{
6190 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
6191 (__v4sf) __B,
6192 __imm,
6193 (__v16sf) __A, -1);
6194}
6195
6196extern __inline __m512i
6197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6198_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
6199{
6200 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6201 (__v4di) __B,
6202 __imm,
6203 (__v8di)
4271e5cb 6204 _mm512_undefined_epi32 (),
756c5857
AI
6205 (__mmask8) -1);
6206}
6207
6208extern __inline __m512i
6209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6210_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
6211 __m256i __B, const int __imm)
6212{
6213 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6214 (__v4di) __B,
6215 __imm,
6216 (__v8di) __W,
6217 (__mmask8) __U);
6218}
6219
6220extern __inline __m512i
6221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6222_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
6223 const int __imm)
6224{
6225 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6226 (__v4di) __B,
6227 __imm,
6228 (__v8di)
6229 _mm512_setzero_si512 (),
6230 (__mmask8) __U);
6231}
6232
6233extern __inline __m512d
6234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6235_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
6236{
6237 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6238 (__v4df) __B,
6239 __imm,
6240 (__v8df)
0b192937 6241 _mm512_undefined_pd (),
756c5857
AI
6242 (__mmask8) -1);
6243}
6244
6245extern __inline __m512d
6246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6247_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
6248 __m256d __B, const int __imm)
6249{
6250 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6251 (__v4df) __B,
6252 __imm,
6253 (__v8df) __W,
6254 (__mmask8) __U);
6255}
6256
6257extern __inline __m512d
6258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6259_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
6260 const int __imm)
6261{
6262 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6263 (__v4df) __B,
6264 __imm,
6265 (__v8df)
6266 _mm512_setzero_pd (),
6267 (__mmask8) __U);
6268}
6269#else
6270#define _mm512_insertf32x4(X, Y, C) \
6271 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
6272 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
6273
6274#define _mm512_inserti32x4(X, Y, C) \
6275 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
6276 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
6277
6278#define _mm512_insertf64x4(X, Y, C) \
6279 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6280 (__v4df)(__m256d) (Y), (int) (C), \
0b192937 6281 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
6282 (__mmask8)-1))
6283
6284#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
6285 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6286 (__v4df)(__m256d) (Y), (int) (C), \
6287 (__v8df)(__m512d)(W), \
6288 (__mmask8)(U)))
6289
6290#define _mm512_maskz_insertf64x4(U, X, Y, C) \
6291 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6292 (__v4df)(__m256d) (Y), (int) (C), \
6293 (__v8df)(__m512d)_mm512_setzero_pd(), \
6294 (__mmask8)(U)))
6295
6296#define _mm512_inserti64x4(X, Y, C) \
6297 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6298 (__v4di)(__m256i) (Y), (int) (C), \
4271e5cb 6299 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
6300 (__mmask8)-1))
6301
6302#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
6303 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6304 (__v4di)(__m256i) (Y), (int) (C),\
6305 (__v8di)(__m512i)(W),\
6306 (__mmask8)(U)))
6307
6308#define _mm512_maskz_inserti64x4(U, X, Y, C) \
6309 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6310 (__v4di)(__m256i) (Y), (int) (C), \
6311 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6312 (__mmask8)(U)))
6313#endif
6314
6315extern __inline __m512d
6316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6317_mm512_loadu_pd (void const *__P)
6318{
c6b0037d 6319 return *(__m512d_u *)__P;
756c5857
AI
6320}
6321
6322extern __inline __m512d
6323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6324_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6325{
fc9cf6da 6326 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6327 (__v8df) __W,
6328 (__mmask8) __U);
6329}
6330
6331extern __inline __m512d
6332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6333_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6334{
fc9cf6da 6335 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6336 (__v8df)
6337 _mm512_setzero_pd (),
6338 (__mmask8) __U);
6339}
6340
6341extern __inline void
6342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6343_mm512_storeu_pd (void *__P, __m512d __A)
6344{
c6b0037d 6345 *(__m512d_u *)__P = __A;
756c5857
AI
6346}
6347
6348extern __inline void
6349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6350_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6351{
fc9cf6da 6352 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
756c5857
AI
6353 (__mmask8) __U);
6354}
6355
6356extern __inline __m512
6357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6358_mm512_loadu_ps (void const *__P)
6359{
c6b0037d 6360 return *(__m512_u *)__P;
756c5857
AI
6361}
6362
6363extern __inline __m512
6364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6365_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6366{
fc9cf6da 6367 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6368 (__v16sf) __W,
6369 (__mmask16) __U);
6370}
6371
6372extern __inline __m512
6373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6374_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6375{
fc9cf6da 6376 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6377 (__v16sf)
6378 _mm512_setzero_ps (),
6379 (__mmask16) __U);
6380}
6381
6382extern __inline void
6383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6384_mm512_storeu_ps (void *__P, __m512 __A)
6385{
c6b0037d 6386 *(__m512_u *)__P = __A;
756c5857
AI
6387}
6388
6389extern __inline void
6390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6391_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6392{
fc9cf6da 6393 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
756c5857
AI
6394 (__mmask16) __U);
6395}
6396
459d21c6
JJ
6397extern __inline __m128
6398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6399_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
6400{
6401 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
6402}
6403
6404extern __inline __m128
6405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6406_mm_maskz_load_ss (__mmask8 __U, const float *__P)
6407{
6408 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
6409 __U);
6410}
6411
6412extern __inline __m128d
6413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6414_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
6415{
6416 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
6417}
6418
6419extern __inline __m128d
6420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6421_mm_maskz_load_sd (__mmask8 __U, const double *__P)
6422{
6423 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
6424 __U);
6425}
6426
6427extern __inline __m128
6428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6429_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6430{
6431 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
6432 (__v4sf) __W, __U);
6433}
6434
6435extern __inline __m128
6436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6437_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
6438{
6439 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
6440 (__v4sf) _mm_setzero_ps (), __U);
6441}
6442
6443extern __inline __m128d
6444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6445_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6446{
6447 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
6448 (__v2df) __W, __U);
6449}
6450
6451extern __inline __m128d
6452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6453_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
6454{
6455 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
6456 (__v2df) _mm_setzero_pd (),
6457 __U);
6458}
6459
6460extern __inline void
6461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6462_mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
6463{
6464 __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
6465}
6466
6467extern __inline void
6468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6469_mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
6470{
6471 __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
6472}
6473
4c98bdad
SP
6474extern __inline __m512i
6475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6476_mm512_loadu_epi64 (void const *__P)
6477{
6478 return *(__m512i_u *) __P;
6479}
6480
756c5857
AI
6481extern __inline __m512i
6482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6483_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6484{
fc9cf6da 6485 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6486 (__v8di) __W,
6487 (__mmask8) __U);
6488}
6489
6490extern __inline __m512i
6491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6492_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6493{
fc9cf6da 6494 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6495 (__v8di)
6496 _mm512_setzero_si512 (),
6497 (__mmask8) __U);
6498}
6499
4c98bdad
SP
6500extern __inline void
6501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6502_mm512_storeu_epi64 (void *__P, __m512i __A)
6503{
6504 *(__m512i_u *) __P = (__m512i_u) __A;
6505}
6506
756c5857
AI
6507extern __inline void
6508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6509_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6510{
fc9cf6da 6511 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
756c5857
AI
6512 (__mmask8) __U);
6513}
6514
6515extern __inline __m512i
6516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6517_mm512_loadu_si512 (void const *__P)
756c5857 6518{
c6b0037d 6519 return *(__m512i_u *)__P;
756c5857
AI
6520}
6521
4c98bdad
SP
6522extern __inline __m512i
6523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6524_mm512_loadu_epi32 (void const *__P)
6525{
6526 return *(__m512i_u *) __P;
6527}
6528
756c5857
AI
6529extern __inline __m512i
6530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6531_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6532{
fc9cf6da 6533 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6534 (__v16si) __W,
6535 (__mmask16) __U);
6536}
6537
6538extern __inline __m512i
6539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6540_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6541{
fc9cf6da 6542 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6543 (__v16si)
6544 _mm512_setzero_si512 (),
6545 (__mmask16) __U);
6546}
6547
6548extern __inline void
6549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6550_mm512_storeu_si512 (void *__P, __m512i __A)
756c5857 6551{
c6b0037d 6552 *(__m512i_u *)__P = __A;
756c5857
AI
6553}
6554
4c98bdad
SP
6555extern __inline void
6556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6557_mm512_storeu_epi32 (void *__P, __m512i __A)
6558{
6559 *(__m512i_u *) __P = (__m512i_u) __A;
6560}
6561
756c5857
AI
6562extern __inline void
6563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6564_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6565{
fc9cf6da 6566 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
756c5857
AI
6567 (__mmask16) __U);
6568}
6569
6570extern __inline __m512d
6571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6572_mm512_permutevar_pd (__m512d __A, __m512i __C)
6573{
6574 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6575 (__v8di) __C,
6576 (__v8df)
0b192937 6577 _mm512_undefined_pd (),
756c5857
AI
6578 (__mmask8) -1);
6579}
6580
6581extern __inline __m512d
6582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6583_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6584{
6585 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6586 (__v8di) __C,
6587 (__v8df) __W,
6588 (__mmask8) __U);
6589}
6590
6591extern __inline __m512d
6592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6593_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6594{
6595 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6596 (__v8di) __C,
6597 (__v8df)
6598 _mm512_setzero_pd (),
6599 (__mmask8) __U);
6600}
6601
6602extern __inline __m512
6603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6604_mm512_permutevar_ps (__m512 __A, __m512i __C)
6605{
6606 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6607 (__v16si) __C,
6608 (__v16sf)
0b192937 6609 _mm512_undefined_ps (),
756c5857
AI
6610 (__mmask16) -1);
6611}
6612
6613extern __inline __m512
6614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6615_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6616{
6617 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6618 (__v16si) __C,
6619 (__v16sf) __W,
6620 (__mmask16) __U);
6621}
6622
6623extern __inline __m512
6624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6625_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6626{
6627 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6628 (__v16si) __C,
6629 (__v16sf)
6630 _mm512_setzero_ps (),
6631 (__mmask16) __U);
6632}
6633
6634extern __inline __m512i
6635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6636_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6637{
6638 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6639 /* idx */ ,
6640 (__v8di) __A,
6641 (__v8di) __B,
6642 (__mmask8) -1);
6643}
6644
6645extern __inline __m512i
6646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6647_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6648 __m512i __B)
6649{
6650 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6651 /* idx */ ,
6652 (__v8di) __A,
6653 (__v8di) __B,
6654 (__mmask8) __U);
6655}
6656
6657extern __inline __m512i
6658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6659_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6660 __mmask8 __U, __m512i __B)
6661{
6662 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6663 (__v8di) __I
6664 /* idx */ ,
6665 (__v8di) __B,
6666 (__mmask8) __U);
6667}
6668
6669extern __inline __m512i
6670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6671_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6672 __m512i __I, __m512i __B)
6673{
6674 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6675 /* idx */ ,
6676 (__v8di) __A,
6677 (__v8di) __B,
6678 (__mmask8) __U);
6679}
6680
6681extern __inline __m512i
6682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6683_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6684{
6685 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6686 /* idx */ ,
6687 (__v16si) __A,
6688 (__v16si) __B,
6689 (__mmask16) -1);
6690}
6691
6692extern __inline __m512i
6693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6694_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6695 __m512i __I, __m512i __B)
6696{
6697 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6698 /* idx */ ,
6699 (__v16si) __A,
6700 (__v16si) __B,
6701 (__mmask16) __U);
6702}
6703
6704extern __inline __m512i
6705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6706_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6707 __mmask16 __U, __m512i __B)
6708{
6709 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6710 (__v16si) __I
6711 /* idx */ ,
6712 (__v16si) __B,
6713 (__mmask16) __U);
6714}
6715
6716extern __inline __m512i
6717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6718_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6719 __m512i __I, __m512i __B)
6720{
6721 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6722 /* idx */ ,
6723 (__v16si) __A,
6724 (__v16si) __B,
6725 (__mmask16) __U);
6726}
6727
6728extern __inline __m512d
6729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6730_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6731{
6732 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6733 /* idx */ ,
6734 (__v8df) __A,
6735 (__v8df) __B,
6736 (__mmask8) -1);
6737}
6738
6739extern __inline __m512d
6740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6741_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6742 __m512d __B)
6743{
6744 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6745 /* idx */ ,
6746 (__v8df) __A,
6747 (__v8df) __B,
6748 (__mmask8) __U);
6749}
6750
6751extern __inline __m512d
6752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6753_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6754 __m512d __B)
6755{
6756 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6757 (__v8di) __I
6758 /* idx */ ,
6759 (__v8df) __B,
6760 (__mmask8) __U);
6761}
6762
6763extern __inline __m512d
6764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6765_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6766 __m512d __B)
6767{
6768 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6769 /* idx */ ,
6770 (__v8df) __A,
6771 (__v8df) __B,
6772 (__mmask8) __U);
6773}
6774
6775extern __inline __m512
6776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6778{
6779 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6780 /* idx */ ,
6781 (__v16sf) __A,
6782 (__v16sf) __B,
6783 (__mmask16) -1);
6784}
6785
6786extern __inline __m512
6787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6788_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6789{
6790 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6791 /* idx */ ,
6792 (__v16sf) __A,
6793 (__v16sf) __B,
6794 (__mmask16) __U);
6795}
6796
6797extern __inline __m512
6798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6799_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6800 __m512 __B)
6801{
6802 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6803 (__v16si) __I
6804 /* idx */ ,
6805 (__v16sf) __B,
6806 (__mmask16) __U);
6807}
6808
6809extern __inline __m512
6810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6811_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6812 __m512 __B)
6813{
6814 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6815 /* idx */ ,
6816 (__v16sf) __A,
6817 (__v16sf) __B,
6818 (__mmask16) __U);
6819}
6820
6821#ifdef __OPTIMIZE__
6822extern __inline __m512d
6823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6824_mm512_permute_pd (__m512d __X, const int __C)
6825{
6826 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6827 (__v8df)
0b192937 6828 _mm512_undefined_pd (),
756c5857
AI
6829 (__mmask8) -1);
6830}
6831
6832extern __inline __m512d
6833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6835{
6836 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6837 (__v8df) __W,
6838 (__mmask8) __U);
6839}
6840
6841extern __inline __m512d
6842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6843_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6844{
6845 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6846 (__v8df)
6847 _mm512_setzero_pd (),
6848 (__mmask8) __U);
6849}
6850
6851extern __inline __m512
6852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6853_mm512_permute_ps (__m512 __X, const int __C)
6854{
6855 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6856 (__v16sf)
0b192937 6857 _mm512_undefined_ps (),
756c5857
AI
6858 (__mmask16) -1);
6859}
6860
6861extern __inline __m512
6862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6863_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6864{
6865 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6866 (__v16sf) __W,
6867 (__mmask16) __U);
6868}
6869
6870extern __inline __m512
6871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6872_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6873{
6874 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6875 (__v16sf)
6876 _mm512_setzero_ps (),
6877 (__mmask16) __U);
6878}
6879#else
6880#define _mm512_permute_pd(X, C) \
6881 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
0b192937 6882 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6883 (__mmask8)(-1)))
6884
6885#define _mm512_mask_permute_pd(W, U, X, C) \
6886 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6887 (__v8df)(__m512d)(W), \
6888 (__mmask8)(U)))
6889
6890#define _mm512_maskz_permute_pd(U, X, C) \
6891 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6892 (__v8df)(__m512d)_mm512_setzero_pd(), \
6893 (__mmask8)(U)))
6894
6895#define _mm512_permute_ps(X, C) \
6896 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
0b192937 6897 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6898 (__mmask16)(-1)))
6899
6900#define _mm512_mask_permute_ps(W, U, X, C) \
6901 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6902 (__v16sf)(__m512)(W), \
6903 (__mmask16)(U)))
6904
6905#define _mm512_maskz_permute_ps(U, X, C) \
6906 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6907 (__v16sf)(__m512)_mm512_setzero_ps(), \
6908 (__mmask16)(U)))
6909#endif
6910
6911#ifdef __OPTIMIZE__
6912extern __inline __m512i
6913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6914_mm512_permutex_epi64 (__m512i __X, const int __I)
6915{
6916 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6917 (__v8di)
4271e5cb 6918 _mm512_undefined_epi32 (),
756c5857
AI
6919 (__mmask8) (-1));
6920}
6921
6922extern __inline __m512i
6923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6924_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6925 __m512i __X, const int __I)
6926{
6927 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6928 (__v8di) __W,
6929 (__mmask8) __M);
6930}
6931
6932extern __inline __m512i
6933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6934_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6935{
6936 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6937 (__v8di)
6938 _mm512_setzero_si512 (),
6939 (__mmask8) __M);
6940}
6941
6942extern __inline __m512d
6943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944_mm512_permutex_pd (__m512d __X, const int __M)
6945{
6946 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6947 (__v8df)
0b192937 6948 _mm512_undefined_pd (),
756c5857
AI
6949 (__mmask8) -1);
6950}
6951
6952extern __inline __m512d
6953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6954_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6955{
6956 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6957 (__v8df) __W,
6958 (__mmask8) __U);
6959}
6960
6961extern __inline __m512d
6962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6963_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6964{
6965 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6966 (__v8df)
6967 _mm512_setzero_pd (),
6968 (__mmask8) __U);
6969}
6970#else
6971#define _mm512_permutex_pd(X, M) \
6972 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
0b192937
UD
6973 (__v8df)(__m512d)_mm512_undefined_pd(),\
6974 (__mmask8)-1))
756c5857
AI
6975
6976#define _mm512_mask_permutex_pd(W, U, X, M) \
6977 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6978 (__v8df)(__m512d)(W), (__mmask8)(U)))
6979
6980#define _mm512_maskz_permutex_pd(U, X, M) \
6981 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6982 (__v8df)(__m512d)_mm512_setzero_pd(),\
6983 (__mmask8)(U)))
6984
6985#define _mm512_permutex_epi64(X, I) \
6986 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6987 (int)(I), \
0b192937 6988 (__v8di)(__m512i) \
4271e5cb 6989 (_mm512_undefined_epi32 ()),\
756c5857
AI
6990 (__mmask8)(-1)))
6991
6992#define _mm512_maskz_permutex_epi64(M, X, I) \
6993 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6994 (int)(I), \
6995 (__v8di)(__m512i) \
6996 (_mm512_setzero_si512 ()),\
6997 (__mmask8)(M)))
6998
6999#define _mm512_mask_permutex_epi64(W, M, X, I) \
7000 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
7001 (int)(I), \
7002 (__v8di)(__m512i)(W), \
7003 (__mmask8)(M)))
7004#endif
7005
7006extern __inline __m512i
7007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7008_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
7009{
583a9919
KY
7010 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
7011 (__v8di) __X,
756c5857
AI
7012 (__v8di)
7013 _mm512_setzero_si512 (),
7014 __M);
7015}
7016
7017extern __inline __m512i
7018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7019_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
7020{
583a9919
KY
7021 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
7022 (__v8di) __X,
756c5857 7023 (__v8di)
4271e5cb 7024 _mm512_undefined_epi32 (),
756c5857
AI
7025 (__mmask8) -1);
7026}
7027
7028extern __inline __m512i
7029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7030_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
7031 __m512i __Y)
7032{
583a9919
KY
7033 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
7034 (__v8di) __X,
756c5857
AI
7035 (__v8di) __W,
7036 __M);
7037}
7038
7039extern __inline __m512i
7040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7041_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
7042{
583a9919
KY
7043 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7044 (__v16si) __X,
756c5857
AI
7045 (__v16si)
7046 _mm512_setzero_si512 (),
7047 __M);
7048}
7049
7050extern __inline __m512i
7051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7052_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
7053{
583a9919
KY
7054 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7055 (__v16si) __X,
756c5857 7056 (__v16si)
4271e5cb 7057 _mm512_undefined_epi32 (),
756c5857
AI
7058 (__mmask16) -1);
7059}
7060
7061extern __inline __m512i
7062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7063_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
7064 __m512i __Y)
7065{
583a9919
KY
7066 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
7067 (__v16si) __X,
756c5857
AI
7068 (__v16si) __W,
7069 __M);
7070}
7071
7072extern __inline __m512d
7073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7074_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
7075{
7076 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7077 (__v8di) __X,
7078 (__v8df)
0b192937 7079 _mm512_undefined_pd (),
756c5857
AI
7080 (__mmask8) -1);
7081}
7082
7083extern __inline __m512d
7084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7085_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
7086{
7087 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7088 (__v8di) __X,
7089 (__v8df) __W,
7090 (__mmask8) __U);
7091}
7092
7093extern __inline __m512d
7094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7095_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
7096{
7097 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
7098 (__v8di) __X,
7099 (__v8df)
7100 _mm512_setzero_pd (),
7101 (__mmask8) __U);
7102}
7103
7104extern __inline __m512
7105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7106_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
7107{
7108 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7109 (__v16si) __X,
7110 (__v16sf)
0b192937 7111 _mm512_undefined_ps (),
756c5857
AI
7112 (__mmask16) -1);
7113}
7114
7115extern __inline __m512
7116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7117_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
7118{
7119 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7120 (__v16si) __X,
7121 (__v16sf) __W,
7122 (__mmask16) __U);
7123}
7124
7125extern __inline __m512
7126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7127_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
7128{
7129 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7130 (__v16si) __X,
7131 (__v16sf)
7132 _mm512_setzero_ps (),
7133 (__mmask16) __U);
7134}
7135
7136#ifdef __OPTIMIZE__
7137extern __inline __m512
7138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7139_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
7140{
7141 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7142 (__v16sf) __V, __imm,
7143 (__v16sf)
0b192937 7144 _mm512_undefined_ps (),
756c5857
AI
7145 (__mmask16) -1);
7146}
7147
7148extern __inline __m512
7149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7150_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
7151 __m512 __V, const int __imm)
7152{
7153 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7154 (__v16sf) __V, __imm,
7155 (__v16sf) __W,
7156 (__mmask16) __U);
7157}
7158
7159extern __inline __m512
7160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7161_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
7162{
7163 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7164 (__v16sf) __V, __imm,
7165 (__v16sf)
7166 _mm512_setzero_ps (),
7167 (__mmask16) __U);
7168}
7169
7170extern __inline __m512d
7171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7172_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
7173{
7174 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7175 (__v8df) __V, __imm,
7176 (__v8df)
0b192937 7177 _mm512_undefined_pd (),
756c5857
AI
7178 (__mmask8) -1);
7179}
7180
7181extern __inline __m512d
7182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7183_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
7184 __m512d __V, const int __imm)
7185{
7186 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7187 (__v8df) __V, __imm,
7188 (__v8df) __W,
7189 (__mmask8) __U);
7190}
7191
7192extern __inline __m512d
7193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
7195 const int __imm)
7196{
7197 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7198 (__v8df) __V, __imm,
7199 (__v8df)
7200 _mm512_setzero_pd (),
7201 (__mmask8) __U);
7202}
7203
7204extern __inline __m512d
7205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7206_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
756c5857
AI
7207 const int __imm, const int __R)
7208{
040d2bba
WX
7209 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
7210 (__v8df) __B,
7211 (__v8di) __C,
756c5857 7212 __imm,
040d2bba 7213 (__mmask8) -1, __R);
756c5857
AI
7214}
7215
7216extern __inline __m512d
7217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7218_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
7219 __m512i __C, const int __imm, const int __R)
756c5857
AI
7220{
7221 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
040d2bba
WX
7222 (__v8df) __B,
7223 (__v8di) __C,
756c5857
AI
7224 __imm,
7225 (__mmask8) __U, __R);
7226}
7227
7228extern __inline __m512d
7229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7230_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
7231 __m512i __C, const int __imm, const int __R)
756c5857
AI
7232{
7233 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
040d2bba
WX
7234 (__v8df) __B,
7235 (__v8di) __C,
756c5857
AI
7236 __imm,
7237 (__mmask8) __U, __R);
7238}
7239
7240extern __inline __m512
7241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7242_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
756c5857
AI
7243 const int __imm, const int __R)
7244{
040d2bba
WX
7245 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
7246 (__v16sf) __B,
7247 (__v16si) __C,
756c5857 7248 __imm,
040d2bba 7249 (__mmask16) -1, __R);
756c5857
AI
7250}
7251
7252extern __inline __m512
7253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7254_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
7255 __m512i __C, const int __imm, const int __R)
756c5857
AI
7256{
7257 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
040d2bba
WX
7258 (__v16sf) __B,
7259 (__v16si) __C,
756c5857
AI
7260 __imm,
7261 (__mmask16) __U, __R);
7262}
7263
7264extern __inline __m512
7265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7266_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
7267 __m512i __C, const int __imm, const int __R)
756c5857
AI
7268{
7269 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
040d2bba
WX
7270 (__v16sf) __B,
7271 (__v16si) __C,
756c5857
AI
7272 __imm,
7273 (__mmask16) __U, __R);
7274}
7275
7276extern __inline __m128d
7277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7278_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
756c5857
AI
7279 const int __imm, const int __R)
7280{
040d2bba
WX
7281 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
7282 (__v2df) __B,
7283 (__v2di) __C, __imm,
7284 (__mmask8) -1, __R);
756c5857
AI
7285}
7286
7287extern __inline __m128d
7288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7289_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
7290 __m128i __C, const int __imm, const int __R)
756c5857
AI
7291{
7292 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
040d2bba
WX
7293 (__v2df) __B,
7294 (__v2di) __C, __imm,
756c5857
AI
7295 (__mmask8) __U, __R);
7296}
7297
7298extern __inline __m128d
7299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7300_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
7301 __m128i __C, const int __imm, const int __R)
756c5857
AI
7302{
7303 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
040d2bba
WX
7304 (__v2df) __B,
7305 (__v2di) __C,
756c5857
AI
7306 __imm,
7307 (__mmask8) __U, __R);
7308}
7309
7310extern __inline __m128
7311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7312_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
756c5857
AI
7313 const int __imm, const int __R)
7314{
040d2bba
WX
7315 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7316 (__v4sf) __B,
7317 (__v4si) __C, __imm,
7318 (__mmask8) -1, __R);
756c5857
AI
7319}
7320
7321extern __inline __m128
7322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7323_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
7324 __m128i __C, const int __imm, const int __R)
756c5857
AI
7325{
7326 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
040d2bba
WX
7327 (__v4sf) __B,
7328 (__v4si) __C, __imm,
756c5857
AI
7329 (__mmask8) __U, __R);
7330}
7331
7332extern __inline __m128
7333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7334_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
7335 __m128i __C, const int __imm, const int __R)
756c5857
AI
7336{
7337 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
040d2bba
WX
7338 (__v4sf) __B,
7339 (__v4si) __C, __imm,
756c5857
AI
7340 (__mmask8) __U, __R);
7341}
7342
7343#else
7344#define _mm512_shuffle_pd(X, Y, C) \
7345 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7346 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 7347 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
7348 (__mmask8)-1))
7349
7350#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
7351 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7352 (__v8df)(__m512d)(Y), (int)(C),\
7353 (__v8df)(__m512d)(W),\
7354 (__mmask8)(U)))
7355
7356#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
7357 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7358 (__v8df)(__m512d)(Y), (int)(C),\
7359 (__v8df)(__m512d)_mm512_setzero_pd(),\
7360 (__mmask8)(U)))
7361
7362#define _mm512_shuffle_ps(X, Y, C) \
7363 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7364 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 7365 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
7366 (__mmask16)-1))
7367
7368#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
7369 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7370 (__v16sf)(__m512)(Y), (int)(C),\
7371 (__v16sf)(__m512)(W),\
7372 (__mmask16)(U)))
7373
7374#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
7375 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7376 (__v16sf)(__m512)(Y), (int)(C),\
7377 (__v16sf)(__m512)_mm512_setzero_ps(),\
7378 (__mmask16)(U)))
7379
040d2bba
WX
7380#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
7381 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7382 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7383 (__mmask8)(-1), (R)))
756c5857 7384
040d2bba 7385#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
756c5857 7386 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
040d2bba 7387 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
7388 (__mmask8)(U), (R)))
7389
040d2bba 7390#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
756c5857 7391 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
040d2bba 7392 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
7393 (__mmask8)(U), (R)))
7394
040d2bba
WX
7395#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
7396 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7397 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7398 (__mmask16)(-1), (R)))
756c5857 7399
040d2bba 7400#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
756c5857 7401 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
040d2bba
WX
7402 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7403 (__mmask16)(U), (R)))
756c5857 7404
040d2bba 7405#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
756c5857 7406 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
040d2bba 7407 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
756c5857
AI
7408 (__mmask16)(U), (R)))
7409
040d2bba
WX
7410#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
7411 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7412 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7413 (__mmask8)(-1), (R)))
756c5857 7414
040d2bba 7415#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
756c5857 7416 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
040d2bba
WX
7417 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7418 (__mmask8)(U), (R)))
756c5857 7419
040d2bba 7420#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
756c5857 7421 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
040d2bba 7422 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
756c5857
AI
7423 (__mmask8)(U), (R)))
7424
040d2bba
WX
7425#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7426 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7427 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7428 (__mmask8)(-1), (R)))
756c5857 7429
040d2bba 7430#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
756c5857 7431 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
040d2bba
WX
7432 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7433 (__mmask8)(U), (R)))
756c5857 7434
040d2bba 7435#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
756c5857 7436 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
040d2bba 7437 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
756c5857
AI
7438 (__mmask8)(U), (R)))
7439#endif
7440
7441extern __inline __m512
7442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7443_mm512_movehdup_ps (__m512 __A)
7444{
7445 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7446 (__v16sf)
0b192937 7447 _mm512_undefined_ps (),
756c5857
AI
7448 (__mmask16) -1);
7449}
7450
7451extern __inline __m512
7452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7453_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7454{
7455 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7456 (__v16sf) __W,
7457 (__mmask16) __U);
7458}
7459
7460extern __inline __m512
7461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7462_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7463{
7464 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7465 (__v16sf)
7466 _mm512_setzero_ps (),
7467 (__mmask16) __U);
7468}
7469
7470extern __inline __m512
7471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7472_mm512_moveldup_ps (__m512 __A)
7473{
7474 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7475 (__v16sf)
0b192937 7476 _mm512_undefined_ps (),
756c5857
AI
7477 (__mmask16) -1);
7478}
7479
7480extern __inline __m512
7481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7482_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7483{
7484 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7485 (__v16sf) __W,
7486 (__mmask16) __U);
7487}
7488
7489extern __inline __m512
7490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7491_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7492{
7493 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7494 (__v16sf)
7495 _mm512_setzero_ps (),
7496 (__mmask16) __U);
7497}
7498
7499extern __inline __m512i
7500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501_mm512_or_si512 (__m512i __A, __m512i __B)
7502{
2069d6fc 7503 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7504}
7505
7506extern __inline __m512i
7507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508_mm512_or_epi32 (__m512i __A, __m512i __B)
7509{
2069d6fc 7510 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7511}
7512
7513extern __inline __m512i
7514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7515_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7516{
7517 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7518 (__v16si) __B,
7519 (__v16si) __W,
7520 (__mmask16) __U);
7521}
7522
7523extern __inline __m512i
7524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7525_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7526{
7527 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7528 (__v16si) __B,
7529 (__v16si)
7530 _mm512_setzero_si512 (),
7531 (__mmask16) __U);
7532}
7533
7534extern __inline __m512i
7535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7536_mm512_or_epi64 (__m512i __A, __m512i __B)
7537{
2069d6fc 7538 return (__m512i) ((__v8du) __A | (__v8du) __B);
756c5857
AI
7539}
7540
7541extern __inline __m512i
7542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7543_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7544{
7545 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7546 (__v8di) __B,
7547 (__v8di) __W,
7548 (__mmask8) __U);
7549}
7550
7551extern __inline __m512i
7552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7553_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7554{
7555 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7556 (__v8di) __B,
7557 (__v8di)
7558 _mm512_setzero_si512 (),
7559 (__mmask8) __U);
7560}
7561
7562extern __inline __m512i
7563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7564_mm512_xor_si512 (__m512i __A, __m512i __B)
7565{
2069d6fc 7566 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7567}
7568
7569extern __inline __m512i
7570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7571_mm512_xor_epi32 (__m512i __A, __m512i __B)
7572{
2069d6fc 7573 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7574}
7575
7576extern __inline __m512i
7577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7578_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7579{
7580 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7581 (__v16si) __B,
7582 (__v16si) __W,
7583 (__mmask16) __U);
7584}
7585
7586extern __inline __m512i
7587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7588_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7589{
7590 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7591 (__v16si) __B,
7592 (__v16si)
7593 _mm512_setzero_si512 (),
7594 (__mmask16) __U);
7595}
7596
7597extern __inline __m512i
7598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7599_mm512_xor_epi64 (__m512i __A, __m512i __B)
7600{
2069d6fc 7601 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
756c5857
AI
7602}
7603
7604extern __inline __m512i
7605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 7606_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857
AI
7607{
7608 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7609 (__v8di) __B,
7610 (__v8di) __W,
7611 (__mmask8) __U);
7612}
7613
7614extern __inline __m512i
7615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 7616_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857
AI
7617{
7618 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7619 (__v8di) __B,
7620 (__v8di)
7621 _mm512_setzero_si512 (),
7622 (__mmask8) __U);
7623}
7624
7625#ifdef __OPTIMIZE__
7626extern __inline __m512i
7627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7628_mm512_rol_epi32 (__m512i __A, const int __B)
7629{
7630 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7631 (__v16si)
4271e5cb 7632 _mm512_undefined_epi32 (),
756c5857
AI
7633 (__mmask16) -1);
7634}
7635
7636extern __inline __m512i
7637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7638_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7639{
7640 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7641 (__v16si) __W,
7642 (__mmask16) __U);
7643}
7644
7645extern __inline __m512i
7646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7647_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7648{
7649 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7650 (__v16si)
7651 _mm512_setzero_si512 (),
7652 (__mmask16) __U);
7653}
7654
7655extern __inline __m512i
7656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7657_mm512_ror_epi32 (__m512i __A, int __B)
7658{
7659 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7660 (__v16si)
4271e5cb 7661 _mm512_undefined_epi32 (),
756c5857
AI
7662 (__mmask16) -1);
7663}
7664
7665extern __inline __m512i
7666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7667_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7668{
7669 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7670 (__v16si) __W,
7671 (__mmask16) __U);
7672}
7673
7674extern __inline __m512i
7675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7676_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7677{
7678 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7679 (__v16si)
7680 _mm512_setzero_si512 (),
7681 (__mmask16) __U);
7682}
7683
7684extern __inline __m512i
7685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7686_mm512_rol_epi64 (__m512i __A, const int __B)
7687{
7688 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7689 (__v8di)
4271e5cb 7690 _mm512_undefined_epi32 (),
756c5857
AI
7691 (__mmask8) -1);
7692}
7693
7694extern __inline __m512i
7695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7696_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7697{
7698 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7699 (__v8di) __W,
7700 (__mmask8) __U);
7701}
7702
7703extern __inline __m512i
7704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7705_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7706{
7707 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7708 (__v8di)
7709 _mm512_setzero_si512 (),
7710 (__mmask8) __U);
7711}
7712
7713extern __inline __m512i
7714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7715_mm512_ror_epi64 (__m512i __A, int __B)
7716{
7717 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7718 (__v8di)
4271e5cb 7719 _mm512_undefined_epi32 (),
756c5857
AI
7720 (__mmask8) -1);
7721}
7722
7723extern __inline __m512i
7724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7725_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7726{
7727 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7728 (__v8di) __W,
7729 (__mmask8) __U);
7730}
7731
7732extern __inline __m512i
7733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7734_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7735{
7736 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7737 (__v8di)
7738 _mm512_setzero_si512 (),
7739 (__mmask8) __U);
7740}
7741
7742#else
7743#define _mm512_rol_epi32(A, B) \
7744 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7745 (int)(B), \
4271e5cb 7746 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7747 (__mmask16)(-1)))
7748#define _mm512_mask_rol_epi32(W, U, A, B) \
7749 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7750 (int)(B), \
7751 (__v16si)(__m512i)(W), \
7752 (__mmask16)(U)))
7753#define _mm512_maskz_rol_epi32(U, A, B) \
7754 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7755 (int)(B), \
7756 (__v16si)_mm512_setzero_si512 (), \
7757 (__mmask16)(U)))
7758#define _mm512_ror_epi32(A, B) \
7759 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7760 (int)(B), \
4271e5cb 7761 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7762 (__mmask16)(-1)))
7763#define _mm512_mask_ror_epi32(W, U, A, B) \
7764 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7765 (int)(B), \
7766 (__v16si)(__m512i)(W), \
7767 (__mmask16)(U)))
7768#define _mm512_maskz_ror_epi32(U, A, B) \
7769 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7770 (int)(B), \
7771 (__v16si)_mm512_setzero_si512 (), \
7772 (__mmask16)(U)))
7773#define _mm512_rol_epi64(A, B) \
7774 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7775 (int)(B), \
4271e5cb 7776 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7777 (__mmask8)(-1)))
7778#define _mm512_mask_rol_epi64(W, U, A, B) \
7779 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7780 (int)(B), \
7781 (__v8di)(__m512i)(W), \
7782 (__mmask8)(U)))
7783#define _mm512_maskz_rol_epi64(U, A, B) \
7784 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7785 (int)(B), \
7786 (__v8di)_mm512_setzero_si512 (), \
7787 (__mmask8)(U)))
7788
7789#define _mm512_ror_epi64(A, B) \
7790 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7791 (int)(B), \
4271e5cb 7792 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7793 (__mmask8)(-1)))
7794#define _mm512_mask_ror_epi64(W, U, A, B) \
7795 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7796 (int)(B), \
7797 (__v8di)(__m512i)(W), \
7798 (__mmask8)(U)))
7799#define _mm512_maskz_ror_epi64(U, A, B) \
7800 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7801 (int)(B), \
7802 (__v8di)_mm512_setzero_si512 (), \
7803 (__mmask8)(U)))
7804#endif
7805
7806extern __inline __m512i
7807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7808_mm512_and_si512 (__m512i __A, __m512i __B)
7809{
2069d6fc 7810 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7811}
7812
7813extern __inline __m512i
7814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7815_mm512_and_epi32 (__m512i __A, __m512i __B)
7816{
2069d6fc 7817 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7818}
7819
7820extern __inline __m512i
7821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7822_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7823{
7824 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7825 (__v16si) __B,
7826 (__v16si) __W,
7827 (__mmask16) __U);
7828}
7829
7830extern __inline __m512i
7831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7832_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7833{
7834 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7835 (__v16si) __B,
7836 (__v16si)
7837 _mm512_setzero_si512 (),
7838 (__mmask16) __U);
7839}
7840
7841extern __inline __m512i
7842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7843_mm512_and_epi64 (__m512i __A, __m512i __B)
7844{
2069d6fc 7845 return (__m512i) ((__v8du) __A & (__v8du) __B);
756c5857
AI
7846}
7847
7848extern __inline __m512i
7849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7850_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7851{
7852 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7853 (__v8di) __B,
7854 (__v8di) __W, __U);
7855}
7856
7857extern __inline __m512i
7858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7859_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7860{
7861 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7862 (__v8di) __B,
7863 (__v8di)
7864 _mm512_setzero_pd (),
7865 __U);
7866}
7867
7868extern __inline __m512i
7869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7870_mm512_andnot_si512 (__m512i __A, __m512i __B)
7871{
7872 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7873 (__v16si) __B,
7874 (__v16si)
4271e5cb 7875 _mm512_undefined_epi32 (),
756c5857
AI
7876 (__mmask16) -1);
7877}
7878
7879extern __inline __m512i
7880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7881_mm512_andnot_epi32 (__m512i __A, __m512i __B)
7882{
7883 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7884 (__v16si) __B,
7885 (__v16si)
4271e5cb 7886 _mm512_undefined_epi32 (),
756c5857
AI
7887 (__mmask16) -1);
7888}
7889
7890extern __inline __m512i
7891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7892_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7893{
7894 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7895 (__v16si) __B,
7896 (__v16si) __W,
7897 (__mmask16) __U);
7898}
7899
7900extern __inline __m512i
7901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7902_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7903{
7904 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7905 (__v16si) __B,
7906 (__v16si)
7907 _mm512_setzero_si512 (),
7908 (__mmask16) __U);
7909}
7910
7911extern __inline __m512i
7912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7913_mm512_andnot_epi64 (__m512i __A, __m512i __B)
7914{
7915 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7916 (__v8di) __B,
7917 (__v8di)
4271e5cb 7918 _mm512_undefined_epi32 (),
756c5857
AI
7919 (__mmask8) -1);
7920}
7921
7922extern __inline __m512i
7923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7924_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7925{
7926 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7927 (__v8di) __B,
7928 (__v8di) __W, __U);
7929}
7930
7931extern __inline __m512i
7932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7933_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7934{
7935 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7936 (__v8di) __B,
7937 (__v8di)
7938 _mm512_setzero_pd (),
7939 __U);
7940}
7941
7942extern __inline __mmask16
7943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7944_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7945{
7946 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7947 (__v16si) __B,
7948 (__mmask16) -1);
7949}
7950
7951extern __inline __mmask16
7952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7954{
7955 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7956 (__v16si) __B, __U);
7957}
7958
7959extern __inline __mmask8
7960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7961_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7962{
7963 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7964 (__v8di) __B,
7965 (__mmask8) -1);
7966}
7967
7968extern __inline __mmask8
7969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7970_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7971{
7972 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7973}
7974
260d3642
IT
7975extern __inline __mmask16
7976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7977_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7978{
7979 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7980 (__v16si) __B,
7981 (__mmask16) -1);
7982}
7983
7984extern __inline __mmask16
7985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7986_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7987{
7988 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7989 (__v16si) __B, __U);
7990}
7991
7992extern __inline __mmask8
7993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7994_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7995{
7996 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7997 (__v8di) __B,
7998 (__mmask8) -1);
7999}
8000
8001extern __inline __mmask8
8002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8003_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8004{
8005 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
8006 (__v8di) __B, __U);
8007}
8008
dcb2c527
JJ
8009extern __inline __m512
8010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8011_mm512_abs_ps (__m512 __A)
8012{
8013 return (__m512) _mm512_and_epi32 ((__m512i) __A,
8014 _mm512_set1_epi32 (0x7fffffff));
8015}
8016
8017extern __inline __m512
8018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8019_mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
8020{
8021 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
8022 _mm512_set1_epi32 (0x7fffffff));
8023}
8024
8025extern __inline __m512d
8026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23bce99c 8027_mm512_abs_pd (__m512d __A)
dcb2c527
JJ
8028{
8029 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
8030 _mm512_set1_epi64 (0x7fffffffffffffffLL));
8031}
8032
8033extern __inline __m512d
8034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23bce99c 8035_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
dcb2c527
JJ
8036{
8037 return (__m512d)
8038 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
8039 _mm512_set1_epi64 (0x7fffffffffffffffLL));
8040}
8041
756c5857
AI
8042extern __inline __m512i
8043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8044_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
8045{
8046 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
8047 (__v16si) __B,
8048 (__v16si)
4271e5cb 8049 _mm512_undefined_epi32 (),
756c5857
AI
8050 (__mmask16) -1);
8051}
8052
8053extern __inline __m512i
8054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8055_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8056 __m512i __B)
8057{
8058 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
8059 (__v16si) __B,
8060 (__v16si) __W,
8061 (__mmask16) __U);
8062}
8063
8064extern __inline __m512i
8065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8066_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
8067{
8068 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
8069 (__v16si) __B,
8070 (__v16si)
8071 _mm512_setzero_si512 (),
8072 (__mmask16) __U);
8073}
8074
8075extern __inline __m512i
8076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8077_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
8078{
8079 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
8080 (__v8di) __B,
8081 (__v8di)
4271e5cb 8082 _mm512_undefined_epi32 (),
756c5857
AI
8083 (__mmask8) -1);
8084}
8085
8086extern __inline __m512i
8087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8088_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
8089{
8090 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
8091 (__v8di) __B,
8092 (__v8di) __W,
8093 (__mmask8) __U);
8094}
8095
8096extern __inline __m512i
8097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8098_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
8099{
8100 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
8101 (__v8di) __B,
8102 (__v8di)
8103 _mm512_setzero_si512 (),
8104 (__mmask8) __U);
8105}
8106
8107extern __inline __m512i
8108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8109_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
8110{
8111 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
8112 (__v16si) __B,
8113 (__v16si)
4271e5cb 8114 _mm512_undefined_epi32 (),
756c5857
AI
8115 (__mmask16) -1);
8116}
8117
8118extern __inline __m512i
8119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8120_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8121 __m512i __B)
8122{
8123 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
8124 (__v16si) __B,
8125 (__v16si) __W,
8126 (__mmask16) __U);
8127}
8128
8129extern __inline __m512i
8130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8131_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
8132{
8133 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
8134 (__v16si) __B,
8135 (__v16si)
8136 _mm512_setzero_si512 (),
8137 (__mmask16) __U);
8138}
8139
8140extern __inline __m512i
8141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8142_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
8143{
8144 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8145 (__v8di) __B,
8146 (__v8di)
4271e5cb 8147 _mm512_undefined_epi32 (),
756c5857
AI
8148 (__mmask8) -1);
8149}
8150
8151extern __inline __m512i
8152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8153_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
8154{
8155 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8156 (__v8di) __B,
8157 (__v8di) __W,
8158 (__mmask8) __U);
8159}
8160
8161extern __inline __m512i
8162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8163_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
8164{
8165 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8166 (__v8di) __B,
8167 (__v8di)
8168 _mm512_setzero_si512 (),
8169 (__mmask8) __U);
8170}
8171
8172#ifdef __x86_64__
8173#ifdef __OPTIMIZE__
8174extern __inline unsigned long long
8175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8176_mm_cvt_roundss_u64 (__m128 __A, const int __R)
8177{
8178 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
8179}
8180
8181extern __inline long long
8182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8183_mm_cvt_roundss_si64 (__m128 __A, const int __R)
8184{
8185 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
8186}
8187
8188extern __inline long long
8189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8190_mm_cvt_roundss_i64 (__m128 __A, const int __R)
8191{
8192 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
8193}
8194
8195extern __inline unsigned long long
8196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8197_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
8198{
8199 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
8200}
8201
8202extern __inline long long
8203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8204_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
8205{
8206 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
8207}
8208
8209extern __inline long long
8210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8211_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
8212{
8213 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
8214}
8215#else
8216#define _mm_cvt_roundss_u64(A, B) \
8217 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
8218
8219#define _mm_cvt_roundss_si64(A, B) \
8220 ((long long)__builtin_ia32_vcvtss2si64(A, B))
8221
8222#define _mm_cvt_roundss_i64(A, B) \
8223 ((long long)__builtin_ia32_vcvtss2si64(A, B))
8224
8225#define _mm_cvtt_roundss_u64(A, B) \
8226 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
8227
8228#define _mm_cvtt_roundss_i64(A, B) \
8229 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8230
8231#define _mm_cvtt_roundss_si64(A, B) \
8232 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8233#endif
8234#endif
8235
8236#ifdef __OPTIMIZE__
8237extern __inline unsigned
8238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8239_mm_cvt_roundss_u32 (__m128 __A, const int __R)
8240{
8241 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
8242}
8243
8244extern __inline int
8245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8246_mm_cvt_roundss_si32 (__m128 __A, const int __R)
8247{
8248 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8249}
8250
8251extern __inline int
8252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8253_mm_cvt_roundss_i32 (__m128 __A, const int __R)
8254{
8255 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8256}
8257
8258extern __inline unsigned
8259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8260_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
8261{
8262 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
8263}
8264
8265extern __inline int
8266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8267_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
8268{
8269 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8270}
8271
8272extern __inline int
8273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8274_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
8275{
8276 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8277}
8278#else
8279#define _mm_cvt_roundss_u32(A, B) \
8280 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
8281
8282#define _mm_cvt_roundss_si32(A, B) \
8283 ((int)__builtin_ia32_vcvtss2si32(A, B))
8284
8285#define _mm_cvt_roundss_i32(A, B) \
8286 ((int)__builtin_ia32_vcvtss2si32(A, B))
8287
8288#define _mm_cvtt_roundss_u32(A, B) \
8289 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
8290
8291#define _mm_cvtt_roundss_si32(A, B) \
8292 ((int)__builtin_ia32_vcvttss2si32(A, B))
8293
8294#define _mm_cvtt_roundss_i32(A, B) \
8295 ((int)__builtin_ia32_vcvttss2si32(A, B))
8296#endif
8297
8298#ifdef __x86_64__
8299#ifdef __OPTIMIZE__
8300extern __inline unsigned long long
8301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8302_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
8303{
8304 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
8305}
8306
8307extern __inline long long
8308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8309_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
8310{
8311 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8312}
8313
8314extern __inline long long
8315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8316_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
8317{
8318 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8319}
8320
8321extern __inline unsigned long long
8322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8323_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
8324{
8325 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
8326}
8327
8328extern __inline long long
8329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8330_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
8331{
8332 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8333}
8334
8335extern __inline long long
8336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8337_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
8338{
8339 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8340}
8341#else
8342#define _mm_cvt_roundsd_u64(A, B) \
8343 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
8344
8345#define _mm_cvt_roundsd_si64(A, B) \
8346 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8347
8348#define _mm_cvt_roundsd_i64(A, B) \
8349 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8350
8351#define _mm_cvtt_roundsd_u64(A, B) \
8352 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
8353
8354#define _mm_cvtt_roundsd_si64(A, B) \
8355 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8356
8357#define _mm_cvtt_roundsd_i64(A, B) \
8358 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8359#endif
8360#endif
8361
8362#ifdef __OPTIMIZE__
8363extern __inline unsigned
8364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
8366{
8367 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
8368}
8369
8370extern __inline int
8371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8372_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
8373{
8374 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8375}
8376
8377extern __inline int
8378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8379_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
8380{
8381 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8382}
8383
8384extern __inline unsigned
8385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
8387{
8388 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
8389}
8390
8391extern __inline int
8392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8393_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
8394{
8395 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8396}
8397
8398extern __inline int
8399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8400_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
8401{
8402 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8403}
8404#else
8405#define _mm_cvt_roundsd_u32(A, B) \
8406 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
8407
8408#define _mm_cvt_roundsd_si32(A, B) \
8409 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8410
8411#define _mm_cvt_roundsd_i32(A, B) \
8412 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8413
8414#define _mm_cvtt_roundsd_u32(A, B) \
8415 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8416
8417#define _mm_cvtt_roundsd_si32(A, B) \
8418 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8419
8420#define _mm_cvtt_roundsd_i32(A, B) \
8421 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8422#endif
8423
8424extern __inline __m512d
8425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8426_mm512_movedup_pd (__m512d __A)
8427{
8428 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8429 (__v8df)
0b192937 8430 _mm512_undefined_pd (),
756c5857
AI
8431 (__mmask8) -1);
8432}
8433
8434extern __inline __m512d
8435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8436_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8437{
8438 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8439 (__v8df) __W,
8440 (__mmask8) __U);
8441}
8442
8443extern __inline __m512d
8444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8445_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8446{
8447 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8448 (__v8df)
8449 _mm512_setzero_pd (),
8450 (__mmask8) __U);
8451}
8452
8453extern __inline __m512d
8454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8455_mm512_unpacklo_pd (__m512d __A, __m512d __B)
8456{
8457 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8458 (__v8df) __B,
8459 (__v8df)
0b192937 8460 _mm512_undefined_pd (),
756c5857
AI
8461 (__mmask8) -1);
8462}
8463
8464extern __inline __m512d
8465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8466_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8467{
8468 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8469 (__v8df) __B,
8470 (__v8df) __W,
8471 (__mmask8) __U);
8472}
8473
8474extern __inline __m512d
8475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8476_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8477{
8478 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8479 (__v8df) __B,
8480 (__v8df)
8481 _mm512_setzero_pd (),
8482 (__mmask8) __U);
8483}
8484
8485extern __inline __m512d
8486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8487_mm512_unpackhi_pd (__m512d __A, __m512d __B)
8488{
8489 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8490 (__v8df) __B,
8491 (__v8df)
0b192937 8492 _mm512_undefined_pd (),
756c5857
AI
8493 (__mmask8) -1);
8494}
8495
8496extern __inline __m512d
8497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8498_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8499{
8500 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8501 (__v8df) __B,
8502 (__v8df) __W,
8503 (__mmask8) __U);
8504}
8505
8506extern __inline __m512d
8507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8508_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8509{
8510 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8511 (__v8df) __B,
8512 (__v8df)
8513 _mm512_setzero_pd (),
8514 (__mmask8) __U);
8515}
8516
8517extern __inline __m512
8518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8519_mm512_unpackhi_ps (__m512 __A, __m512 __B)
8520{
8521 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8522 (__v16sf) __B,
8523 (__v16sf)
0b192937 8524 _mm512_undefined_ps (),
756c5857
AI
8525 (__mmask16) -1);
8526}
8527
8528extern __inline __m512
8529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8530_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8531{
8532 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8533 (__v16sf) __B,
8534 (__v16sf) __W,
8535 (__mmask16) __U);
8536}
8537
8538extern __inline __m512
8539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8540_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8541{
8542 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8543 (__v16sf) __B,
8544 (__v16sf)
8545 _mm512_setzero_ps (),
8546 (__mmask16) __U);
8547}
8548
8549#ifdef __OPTIMIZE__
8550extern __inline __m512d
8551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8552_mm512_cvt_roundps_pd (__m256 __A, const int __R)
8553{
8554 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8555 (__v8df)
0b192937 8556 _mm512_undefined_pd (),
756c5857
AI
8557 (__mmask8) -1, __R);
8558}
8559
8560extern __inline __m512d
8561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8562_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8563 const int __R)
8564{
8565 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8566 (__v8df) __W,
8567 (__mmask8) __U, __R);
8568}
8569
8570extern __inline __m512d
8571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8572_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8573{
8574 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8575 (__v8df)
8576 _mm512_setzero_pd (),
8577 (__mmask8) __U, __R);
8578}
8579
8580extern __inline __m512
8581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8582_mm512_cvt_roundph_ps (__m256i __A, const int __R)
8583{
8584 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8585 (__v16sf)
0b192937 8586 _mm512_undefined_ps (),
756c5857
AI
8587 (__mmask16) -1, __R);
8588}
8589
8590extern __inline __m512
8591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8592_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8593 const int __R)
8594{
8595 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8596 (__v16sf) __W,
8597 (__mmask16) __U, __R);
8598}
8599
8600extern __inline __m512
8601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8602_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8603{
8604 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8605 (__v16sf)
8606 _mm512_setzero_ps (),
8607 (__mmask16) __U, __R);
8608}
8609
8610extern __inline __m256i
8611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8612_mm512_cvt_roundps_ph (__m512 __A, const int __I)
8613{
8614 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8615 __I,
8616 (__v16hi)
0b192937 8617 _mm256_undefined_si256 (),
756c5857
AI
8618 -1);
8619}
8620
8621extern __inline __m256i
8622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8623_mm512_cvtps_ph (__m512 __A, const int __I)
8624{
8625 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8626 __I,
8627 (__v16hi)
0b192937 8628 _mm256_undefined_si256 (),
756c5857
AI
8629 -1);
8630}
8631
8632extern __inline __m256i
8633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8634_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8635 const int __I)
8636{
8637 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8638 __I,
8639 (__v16hi) __U,
8640 (__mmask16) __W);
8641}
8642
8643extern __inline __m256i
8644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8645_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8646{
8647 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8648 __I,
8649 (__v16hi) __U,
8650 (__mmask16) __W);
8651}
8652
8653extern __inline __m256i
8654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8655_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8656{
8657 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8658 __I,
8659 (__v16hi)
8660 _mm256_setzero_si256 (),
8661 (__mmask16) __W);
8662}
8663
8664extern __inline __m256i
8665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8666_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8667{
8668 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8669 __I,
8670 (__v16hi)
8671 _mm256_setzero_si256 (),
8672 (__mmask16) __W);
8673}
8674#else
8675#define _mm512_cvt_roundps_pd(A, B) \
0b192937 8676 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
756c5857
AI
8677
8678#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8679 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8680
8681#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8682 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8683
8684#define _mm512_cvt_roundph_ps(A, B) \
0b192937 8685 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
8686
8687#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8688 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8689
8690#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8691 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8692
8693#define _mm512_cvt_roundps_ph(A, I) \
0c8217b1 8694 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
0b192937 8695 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857 8696#define _mm512_cvtps_ph(A, I) \
0c8217b1 8697 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
0b192937 8698 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857 8699#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
0c8217b1 8700 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8701 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8702#define _mm512_mask_cvtps_ph(U, W, A, I) \
0c8217b1 8703 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8704 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8705#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
0c8217b1 8706 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8707 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8708#define _mm512_maskz_cvtps_ph(W, A, I) \
0c8217b1 8709 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8710 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8711#endif
8712
8713#ifdef __OPTIMIZE__
8714extern __inline __m256
8715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8716_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8717{
8718 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8719 (__v8sf)
0b192937 8720 _mm256_undefined_ps (),
756c5857
AI
8721 (__mmask8) -1, __R);
8722}
8723
8724extern __inline __m256
8725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8726_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8727 const int __R)
8728{
8729 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8730 (__v8sf) __W,
8731 (__mmask8) __U, __R);
8732}
8733
8734extern __inline __m256
8735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8737{
8738 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8739 (__v8sf)
8740 _mm256_setzero_ps (),
8741 (__mmask8) __U, __R);
8742}
8743
075691af
AI
8744extern __inline __m128
8745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8746_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8747{
8748 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8749 (__v2df) __B,
8750 __R);
8751}
8752
93103603
SP
8753extern __inline __m128
8754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8755_mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
8756 __m128d __B, const int __R)
8757{
8758 return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
8759 (__v2df) __B,
8760 (__v4sf) __W,
8761 __U,
8762 __R);
8763}
8764
8765extern __inline __m128
8766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8767_mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
8768 __m128d __B, const int __R)
8769{
8770 return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
8771 (__v2df) __B,
8772 _mm_setzero_ps (),
8773 __U,
8774 __R);
8775}
8776
075691af
AI
8777extern __inline __m128d
8778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8779_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8780{
8781 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8782 (__v4sf) __B,
8783 __R);
8784}
93103603
SP
8785
8786extern __inline __m128d
8787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8788_mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
8789 __m128 __B, const int __R)
8790{
8791 return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
8792 (__v4sf) __B,
8793 (__v2df) __W,
8794 __U,
8795 __R);
8796}
8797
8798extern __inline __m128d
8799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8800_mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
8801 __m128 __B, const int __R)
8802{
8803 return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
8804 (__v4sf) __B,
8805 _mm_setzero_pd (),
8806 __U,
8807 __R);
8808}
756c5857
AI
8809#else
8810#define _mm512_cvt_roundpd_ps(A, B) \
0b192937 8811 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
756c5857
AI
8812
8813#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8814 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8815
8816#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8817 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
075691af
AI
8818
8819#define _mm_cvt_roundsd_ss(A, B, C) \
8820 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8821
93103603
SP
8822#define _mm_mask_cvt_roundsd_ss(W, U, A, B, C) \
8823 (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
8824
8825#define _mm_maskz_cvt_roundsd_ss(U, A, B, C) \
8826 (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_setzero_ps (), \
8827 (U), (C))
8828
075691af
AI
8829#define _mm_cvt_roundss_sd(A, B, C) \
8830 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
93103603
SP
8831
8832#define _mm_mask_cvt_roundss_sd(W, U, A, B, C) \
8833 (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
8834
8835#define _mm_maskz_cvt_roundss_sd(U, A, B, C) \
8836 (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_setzero_pd (), \
8837 (U), (C))
8838
756c5857
AI
8839#endif
8840
93103603
SP
8841#define _mm_mask_cvtss_sd(W, U, A, B) \
8842 _mm_mask_cvt_roundss_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
8843
8844#define _mm_maskz_cvtss_sd(U, A, B) \
8845 _mm_maskz_cvt_roundss_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
8846
8847#define _mm_mask_cvtsd_ss(W, U, A, B) \
8848 _mm_mask_cvt_roundsd_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
8849
8850#define _mm_maskz_cvtsd_ss(U, A, B) \
8851 _mm_maskz_cvt_roundsd_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
8852
756c5857
AI
8853extern __inline void
8854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8855_mm512_stream_si512 (__m512i * __P, __m512i __A)
8856{
8857 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8858}
8859
8860extern __inline void
8861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8862_mm512_stream_ps (float *__P, __m512 __A)
8863{
8864 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8865}
8866
8867extern __inline void
8868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8869_mm512_stream_pd (double *__P, __m512d __A)
8870{
8871 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8872}
8873
c56a42b9
KY
8874extern __inline __m512i
8875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8876_mm512_stream_load_si512 (void *__P)
8877{
8878 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8879}
8880
9c3c2608
UB
8881/* Constants for mantissa extraction */
8882typedef enum
8883{
8884 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8885 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8886 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8887 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8888} _MM_MANTISSA_NORM_ENUM;
8889
8890typedef enum
8891{
8892 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8893 _MM_MANT_SIGN_zero, /* sign = 0 */
8894 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8895} _MM_MANTISSA_SIGN_ENUM;
8896
756c5857 8897#ifdef __OPTIMIZE__
075691af
AI
8898extern __inline __m128
8899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8900_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8901{
8902 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8903 (__v4sf) __B,
8904 __R);
8905}
8906
68d872d7
SP
8907extern __inline __m128
8908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8909_mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8910 __m128 __B, const int __R)
8911{
8912 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8913 (__v4sf) __B,
8914 (__v4sf) __W,
8915 (__mmask8) __U, __R);
8916}
8917
8918extern __inline __m128
8919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8920_mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8921 const int __R)
8922{
8923 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8924 (__v4sf) __B,
8925 (__v4sf)
8926 _mm_setzero_ps (),
8927 (__mmask8) __U, __R);
8928}
8929
075691af
AI
8930extern __inline __m128d
8931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8932_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8933{
8934 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8935 (__v2df) __B,
8936 __R);
8937}
8938
68d872d7
SP
8939extern __inline __m128d
8940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8941_mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8942 __m128d __B, const int __R)
8943{
8944 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8945 (__v2df) __B,
8946 (__v2df) __W,
8947 (__mmask8) __U, __R);
8948}
8949
8950extern __inline __m128d
8951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8952_mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8953 const int __R)
8954{
8955 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8956 (__v2df) __B,
8957 (__v2df)
8958 _mm_setzero_pd (),
8959 (__mmask8) __U, __R);
8960}
8961
756c5857
AI
8962extern __inline __m512
8963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8964_mm512_getexp_round_ps (__m512 __A, const int __R)
8965{
8966 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8967 (__v16sf)
0b192937 8968 _mm512_undefined_ps (),
756c5857
AI
8969 (__mmask16) -1, __R);
8970}
8971
8972extern __inline __m512
8973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8974_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8975 const int __R)
8976{
8977 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8978 (__v16sf) __W,
8979 (__mmask16) __U, __R);
8980}
8981
8982extern __inline __m512
8983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8985{
8986 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8987 (__v16sf)
8988 _mm512_setzero_ps (),
8989 (__mmask16) __U, __R);
8990}
8991
8992extern __inline __m512d
8993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8994_mm512_getexp_round_pd (__m512d __A, const int __R)
8995{
8996 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8997 (__v8df)
0b192937 8998 _mm512_undefined_pd (),
756c5857
AI
8999 (__mmask8) -1, __R);
9000}
9001
9002extern __inline __m512d
9003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9004_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
9005 const int __R)
9006{
9007 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
9008 (__v8df) __W,
9009 (__mmask8) __U, __R);
9010}
9011
9012extern __inline __m512d
9013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9014_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
9015{
9016 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
9017 (__v8df)
9018 _mm512_setzero_pd (),
9019 (__mmask8) __U, __R);
9020}
9021
756c5857
AI
9022extern __inline __m512d
9023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9024_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
9025 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
9026{
9027 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
9028 (__C << 2) | __B,
0b192937 9029 _mm512_undefined_pd (),
756c5857
AI
9030 (__mmask8) -1, __R);
9031}
9032
9033extern __inline __m512d
9034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9035_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
9036 _MM_MANTISSA_NORM_ENUM __B,
9037 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
9038{
9039 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
9040 (__C << 2) | __B,
9041 (__v8df) __W, __U,
9042 __R);
9043}
9044
9045extern __inline __m512d
9046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9047_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
9048 _MM_MANTISSA_NORM_ENUM __B,
9049 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
9050{
9051 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
9052 (__C << 2) | __B,
9053 (__v8df)
9054 _mm512_setzero_pd (),
9055 __U, __R);
9056}
9057
9058extern __inline __m512
9059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9060_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
9061 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
9062{
9063 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
9064 (__C << 2) | __B,
0b192937 9065 _mm512_undefined_ps (),
756c5857
AI
9066 (__mmask16) -1, __R);
9067}
9068
9069extern __inline __m512
9070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9071_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
9072 _MM_MANTISSA_NORM_ENUM __B,
9073 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
9074{
9075 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
9076 (__C << 2) | __B,
9077 (__v16sf) __W, __U,
9078 __R);
9079}
9080
9081extern __inline __m512
9082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9083_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
9084 _MM_MANTISSA_NORM_ENUM __B,
9085 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
9086{
9087 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
9088 (__C << 2) | __B,
9089 (__v16sf)
9090 _mm512_setzero_ps (),
9091 __U, __R);
9092}
9093
075691af
AI
9094extern __inline __m128d
9095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9096_mm_getmant_round_sd (__m128d __A, __m128d __B,
9097 _MM_MANTISSA_NORM_ENUM __C,
9098 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9099{
9100 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
9101 (__v2df) __B,
9102 (__D << 2) | __C,
9103 __R);
9104}
9105
68d872d7
SP
9106extern __inline __m128d
9107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108_mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
9109 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
9110 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9111{
9112 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
9113 (__v2df) __B,
9114 (__D << 2) | __C,
9115 (__v2df) __W,
9116 __U, __R);
9117}
9118
9119extern __inline __m128d
9120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9121_mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
9122 _MM_MANTISSA_NORM_ENUM __C,
9123 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9124{
9125 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
9126 (__v2df) __B,
9127 (__D << 2) | __C,
9128 (__v2df)
9129 _mm_setzero_pd(),
9130 __U, __R);
9131}
9132
075691af
AI
9133extern __inline __m128
9134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9135_mm_getmant_round_ss (__m128 __A, __m128 __B,
9136 _MM_MANTISSA_NORM_ENUM __C,
9137 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9138{
9139 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
9140 (__v4sf) __B,
9141 (__D << 2) | __C,
9142 __R);
9143}
9144
68d872d7
SP
9145extern __inline __m128
9146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9147_mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
9148 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
9149 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9150{
9151 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
9152 (__v4sf) __B,
9153 (__D << 2) | __C,
9154 (__v4sf) __W,
9155 __U, __R);
9156}
9157
9158extern __inline __m128
9159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9160_mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
9161 _MM_MANTISSA_NORM_ENUM __C,
9162 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
9163{
9164 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
9165 (__v4sf) __B,
9166 (__D << 2) | __C,
9167 (__v4sf)
9168 _mm_setzero_ps(),
9169 __U, __R);
9170}
9171
756c5857
AI
9172#else
9173#define _mm512_getmant_round_pd(X, B, C, R) \
9174 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
9175 (int)(((C)<<2) | (B)), \
0b192937 9176 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
9177 (__mmask8)-1,\
9178 (R)))
9179
9180#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
9181 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
9182 (int)(((C)<<2) | (B)), \
9183 (__v8df)(__m512d)(W), \
9184 (__mmask8)(U),\
9185 (R)))
9186
9187#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
9188 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
9189 (int)(((C)<<2) | (B)), \
9190 (__v8df)(__m512d)_mm512_setzero_pd(), \
9191 (__mmask8)(U),\
9192 (R)))
9193#define _mm512_getmant_round_ps(X, B, C, R) \
9194 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
9195 (int)(((C)<<2) | (B)), \
0b192937 9196 (__v16sf)(__m512)_mm512_undefined_ps(), \
756c5857
AI
9197 (__mmask16)-1,\
9198 (R)))
9199
9200#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
9201 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
9202 (int)(((C)<<2) | (B)), \
9203 (__v16sf)(__m512)(W), \
9204 (__mmask16)(U),\
9205 (R)))
9206
9207#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
9208 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
9209 (int)(((C)<<2) | (B)), \
9210 (__v16sf)(__m512)_mm512_setzero_ps(), \
9211 (__mmask16)(U),\
9212 (R)))
075691af
AI
9213#define _mm_getmant_round_sd(X, Y, C, D, R) \
9214 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
9215 (__v2df)(__m128d)(Y), \
9216 (int)(((D)<<2) | (C)), \
9217 (R)))
9218
68d872d7
SP
9219#define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
9220 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
9221 (__v2df)(__m128d)(Y), \
9222 (int)(((D)<<2) | (C)), \
9223 (__v2df)(__m128d)(W), \
9224 (__mmask8)(U),\
9225 (R)))
9226
9227#define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
9228 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
9229 (__v2df)(__m128d)(Y), \
9230 (int)(((D)<<2) | (C)), \
9231 (__v2df)(__m128d)_mm_setzero_pd(), \
9232 (__mmask8)(U),\
9233 (R)))
9234
075691af
AI
9235#define _mm_getmant_round_ss(X, Y, C, D, R) \
9236 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
9237 (__v4sf)(__m128)(Y), \
9238 (int)(((D)<<2) | (C)), \
9239 (R)))
9240
68d872d7
SP
9241#define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
9242 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
9243 (__v4sf)(__m128)(Y), \
9244 (int)(((D)<<2) | (C)), \
9245 (__v4sf)(__m128)(W), \
9246 (__mmask8)(U),\
9247 (R)))
9248
9249#define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
9250 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
9251 (__v4sf)(__m128)(Y), \
9252 (int)(((D)<<2) | (C)), \
9253 (__v4sf)(__m128)_mm_setzero_ps(), \
9254 (__mmask8)(U),\
9255 (R)))
9256
075691af
AI
9257#define _mm_getexp_round_ss(A, B, R) \
9258 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
9259
68d872d7
SP
9260#define _mm_mask_getexp_round_ss(W, U, A, B, C) \
9261 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
9262
9263#define _mm_maskz_getexp_round_ss(U, A, B, C) \
9264 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
9265
075691af
AI
9266#define _mm_getexp_round_sd(A, B, R) \
9267 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
9268
68d872d7
SP
9269#define _mm_mask_getexp_round_sd(W, U, A, B, C) \
9270 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
9271
9272#define _mm_maskz_getexp_round_sd(U, A, B, C) \
9273 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
9274
9275
756c5857
AI
9276#define _mm512_getexp_round_ps(A, R) \
9277 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 9278 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
756c5857
AI
9279
9280#define _mm512_mask_getexp_round_ps(W, U, A, R) \
9281 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
9282 (__v16sf)(__m512)(W), (__mmask16)(U), R))
9283
9284#define _mm512_maskz_getexp_round_ps(U, A, R) \
9285 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
9286 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
9287
9288#define _mm512_getexp_round_pd(A, R) \
9289 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 9290 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
756c5857
AI
9291
9292#define _mm512_mask_getexp_round_pd(W, U, A, R) \
9293 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
9294 (__v8df)(__m512d)(W), (__mmask8)(U), R))
9295
9296#define _mm512_maskz_getexp_round_pd(U, A, R) \
9297 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
9298 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
9299#endif
9300
9301#ifdef __OPTIMIZE__
9302extern __inline __m512
9303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9304_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
9305{
9306 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
9307 (__v16sf)
9308 _mm512_undefined_ps (),
9309 -1, __R);
756c5857
AI
9310}
9311
9312extern __inline __m512
9313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9314_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
9315 const int __imm, const int __R)
9316{
9317 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
9318 (__v16sf) __A,
9319 (__mmask16) __B, __R);
9320}
9321
9322extern __inline __m512
9323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9324_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
9325 const int __imm, const int __R)
9326{
9327 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
9328 __imm,
9329 (__v16sf)
9330 _mm512_setzero_ps (),
9331 (__mmask16) __A, __R);
9332}
9333
9334extern __inline __m512d
9335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9336_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
9337{
9338 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
9339 (__v8df)
9340 _mm512_undefined_pd (),
9341 -1, __R);
756c5857
AI
9342}
9343
9344extern __inline __m512d
9345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9346_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
9347 __m512d __C, const int __imm, const int __R)
9348{
9349 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
9350 (__v8df) __A,
9351 (__mmask8) __B, __R);
9352}
9353
9354extern __inline __m512d
9355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9356_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
9357 const int __imm, const int __R)
9358{
9359 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
9360 __imm,
9361 (__v8df)
9362 _mm512_setzero_pd (),
9363 (__mmask8) __A, __R);
9364}
075691af
AI
9365
9366extern __inline __m128
9367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
a7c4d6d1
HL
9368_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
9369 const int __R)
9370{
9371 return (__m128)
9372 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
9373 (__v4sf) __B, __imm,
9374 (__v4sf)
9375 _mm_setzero_ps (),
9376 (__mmask8) -1,
9377 __R);
9378}
9379
9380extern __inline __m128
9381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9382_mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C,
9383 __m128 __D, const int __imm, const int __R)
075691af 9384{
a7c4d6d1
HL
9385 return (__m128)
9386 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
9387 (__v4sf) __D, __imm,
9388 (__v4sf) __A,
9389 (__mmask8) __B,
9390 __R);
9391}
9392
9393extern __inline __m128
9394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9395_mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
9396 const int __imm, const int __R)
9397{
9398 return (__m128)
9399 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
9400 (__v4sf) __C, __imm,
9401 (__v4sf)
9402 _mm_setzero_ps (),
9403 (__mmask8) __A,
9404 __R);
075691af
AI
9405}
9406
9407extern __inline __m128d
9408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9409_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
9410 const int __R)
9411{
a7c4d6d1
HL
9412 return (__m128d)
9413 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
9414 (__v2df) __B, __imm,
9415 (__v2df)
9416 _mm_setzero_pd (),
9417 (__mmask8) -1,
9418 __R);
9419}
9420
9421extern __inline __m128d
9422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9423_mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C,
9424 __m128d __D, const int __imm, const int __R)
9425{
9426 return (__m128d)
9427 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
9428 (__v2df) __D, __imm,
9429 (__v2df) __A,
9430 (__mmask8) __B,
9431 __R);
9432}
9433
9434extern __inline __m128d
9435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9436_mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
9437 const int __imm, const int __R)
9438{
9439 return (__m128d)
9440 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
9441 (__v2df) __C, __imm,
9442 (__v2df)
9443 _mm_setzero_pd (),
9444 (__mmask8) __A,
9445 __R);
075691af
AI
9446}
9447
756c5857
AI
9448#else
9449#define _mm512_roundscale_round_ps(A, B, R) \
9450 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 9451 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
756c5857
AI
9452#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
9453 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
9454 (int)(D), \
9455 (__v16sf)(__m512)(A), \
9456 (__mmask16)(B), R))
9457#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
9458 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
9459 (int)(C), \
9460 (__v16sf)_mm512_setzero_ps(),\
9461 (__mmask16)(A), R))
9462#define _mm512_roundscale_round_pd(A, B, R) \
9463 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 9464 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
756c5857
AI
9465#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
9466 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
9467 (int)(D), \
9468 (__v8df)(__m512d)(A), \
9469 (__mmask8)(B), R))
9470#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
9471 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
9472 (int)(C), \
9473 (__v8df)_mm512_setzero_pd(),\
9474 (__mmask8)(A), R))
a7c4d6d1
HL
9475#define _mm_roundscale_round_ss(A, B, I, R) \
9476 ((__m128) \
9477 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
9478 (__v4sf) (__m128) (B), \
9479 (int) (I), \
9480 (__v4sf) _mm_setzero_ps (), \
9481 (__mmask8) (-1), \
9482 (int) (R)))
9483#define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \
9484 ((__m128) \
9485 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
9486 (__v4sf) (__m128) (C), \
9487 (int) (I), \
9488 (__v4sf) (__m128) (A), \
9489 (__mmask8) (U), \
9490 (int) (R)))
9491#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
9492 ((__m128) \
9493 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
9494 (__v4sf) (__m128) (B), \
9495 (int) (I), \
9496 (__v4sf) _mm_setzero_ps (), \
9497 (__mmask8) (U), \
9498 (int) (R)))
9499#define _mm_roundscale_round_sd(A, B, I, R) \
9500 ((__m128d) \
9501 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
9502 (__v2df) (__m128d) (B), \
9503 (int) (I), \
9504 (__v2df) _mm_setzero_pd (), \
9505 (__mmask8) (-1), \
9506 (int) (R)))
9507#define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \
9508 ((__m128d) \
9509 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
9510 (__v2df) (__m128d) (C), \
9511 (int) (I), \
9512 (__v2df) (__m128d) (A), \
9513 (__mmask8) (U), \
9514 (int) (R)))
9515#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
9516 ((__m128d) \
9517 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
9518 (__v2df) (__m128d) (B), \
9519 (int) (I), \
9520 (__v2df) _mm_setzero_pd (), \
9521 (__mmask8) (U), \
9522 (int) (R)))
756c5857
AI
9523#endif
9524
9525extern __inline __m512
9526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9527_mm512_floor_ps (__m512 __A)
9528{
9529 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9530 _MM_FROUND_FLOOR,
9531 (__v16sf) __A, -1,
9532 _MM_FROUND_CUR_DIRECTION);
9533}
9534
9535extern __inline __m512d
9536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9537_mm512_floor_pd (__m512d __A)
9538{
9539 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9540 _MM_FROUND_FLOOR,
9541 (__v8df) __A, -1,
9542 _MM_FROUND_CUR_DIRECTION);
9543}
9544
9545extern __inline __m512
9546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9547_mm512_ceil_ps (__m512 __A)
9548{
9549 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9550 _MM_FROUND_CEIL,
9551 (__v16sf) __A, -1,
9552 _MM_FROUND_CUR_DIRECTION);
9553}
9554
9555extern __inline __m512d
9556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9557_mm512_ceil_pd (__m512d __A)
9558{
9559 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9560 _MM_FROUND_CEIL,
9561 (__v8df) __A, -1,
9562 _MM_FROUND_CUR_DIRECTION);
9563}
9564
9565extern __inline __m512
9566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9567_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
9568{
9569 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9570 _MM_FROUND_FLOOR,
9571 (__v16sf) __W, __U,
9572 _MM_FROUND_CUR_DIRECTION);
9573}
9574
9575extern __inline __m512d
9576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9577_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
9578{
9579 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9580 _MM_FROUND_FLOOR,
9581 (__v8df) __W, __U,
9582 _MM_FROUND_CUR_DIRECTION);
9583}
9584
9585extern __inline __m512
9586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9587_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
9588{
9589 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9590 _MM_FROUND_CEIL,
9591 (__v16sf) __W, __U,
9592 _MM_FROUND_CUR_DIRECTION);
9593}
9594
9595extern __inline __m512d
9596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9597_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
9598{
9599 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9600 _MM_FROUND_CEIL,
9601 (__v8df) __W, __U,
9602 _MM_FROUND_CUR_DIRECTION);
9603}
9604
756c5857 9605#ifdef __OPTIMIZE__
756c5857
AI
9606extern __inline __m512i
9607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9608_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
9609{
9610 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9611 (__v16si) __B, __imm,
9612 (__v16si)
4271e5cb 9613 _mm512_undefined_epi32 (),
756c5857
AI
9614 (__mmask16) -1);
9615}
9616
9617extern __inline __m512i
9618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9619_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
9620 __m512i __B, const int __imm)
9621{
9622 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9623 (__v16si) __B, __imm,
9624 (__v16si) __W,
9625 (__mmask16) __U);
9626}
9627
9628extern __inline __m512i
9629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9630_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
9631 const int __imm)
9632{
9633 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9634 (__v16si) __B, __imm,
9635 (__v16si)
9636 _mm512_setzero_si512 (),
9637 (__mmask16) __U);
9638}
9639
9640extern __inline __m512i
9641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9642_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
9643{
9644 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9645 (__v8di) __B, __imm,
9646 (__v8di)
4271e5cb 9647 _mm512_undefined_epi32 (),
756c5857
AI
9648 (__mmask8) -1);
9649}
9650
9651extern __inline __m512i
9652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9653_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
9654 __m512i __B, const int __imm)
9655{
9656 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9657 (__v8di) __B, __imm,
9658 (__v8di) __W,
9659 (__mmask8) __U);
9660}
9661
9662extern __inline __m512i
9663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9664_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
9665 const int __imm)
9666{
9667 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9668 (__v8di) __B, __imm,
9669 (__v8di)
9670 _mm512_setzero_si512 (),
9671 (__mmask8) __U);
9672}
9673#else
756c5857
AI
9674#define _mm512_alignr_epi32(X, Y, C) \
9675 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
4271e5cb 9676 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
756c5857
AI
9677 (__mmask16)-1))
9678
9679#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
9680 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9681 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
9682 (__mmask16)(U)))
9683
9684#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
9685 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
0b192937 9686 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
756c5857
AI
9687 (__mmask16)(U)))
9688
9689#define _mm512_alignr_epi64(X, Y, C) \
9690 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
4271e5cb 9691 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
0b192937 9692 (__mmask8)-1))
756c5857
AI
9693
9694#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
9695 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9696 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
9697
9698#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
9699 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
0b192937 9700 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
756c5857
AI
9701 (__mmask8)(U)))
9702#endif
9703
9704extern __inline __mmask16
9705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9706_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
9707{
9708 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9709 (__v16si) __B,
9710 (__mmask16) -1);
9711}
9712
9713extern __inline __mmask16
9714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9715_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9716{
9717 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9718 (__v16si) __B, __U);
9719}
9720
9721extern __inline __mmask8
9722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9723_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9724{
9725 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9726 (__v8di) __B, __U);
9727}
9728
9729extern __inline __mmask8
9730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9731_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
9732{
9733 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9734 (__v8di) __B,
9735 (__mmask8) -1);
9736}
9737
9738extern __inline __mmask16
9739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9740_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9741{
9742 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9743 (__v16si) __B,
9744 (__mmask16) -1);
9745}
9746
9747extern __inline __mmask16
9748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9749_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9750{
9751 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9752 (__v16si) __B, __U);
9753}
9754
9755extern __inline __mmask8
9756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9757_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9758{
9759 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9760 (__v8di) __B, __U);
9761}
9762
9763extern __inline __mmask8
9764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9765_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9766{
9767 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9768 (__v8di) __B,
9769 (__mmask8) -1);
9770}
9771
d256b866
IT
9772extern __inline __mmask16
9773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9774_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9775{
9776 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9777 (__v16si) __Y, 5,
9778 (__mmask16) -1);
9779}
9780
275be1da
IT
9781extern __inline __mmask16
9782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9783_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9784{
9785 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9786 (__v16si) __Y, 5,
9787 (__mmask16) __M);
9788}
9789
9790extern __inline __mmask16
9791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9792_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9793{
9794 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9795 (__v16si) __Y, 5,
9796 (__mmask16) __M);
9797}
9798
d256b866
IT
9799extern __inline __mmask16
9800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9801_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9802{
9803 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9804 (__v16si) __Y, 5,
9805 (__mmask16) -1);
9806}
9807
275be1da
IT
9808extern __inline __mmask8
9809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9810_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9811{
9812 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9813 (__v8di) __Y, 5,
9814 (__mmask8) __M);
9815}
9816
d256b866
IT
9817extern __inline __mmask8
9818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9819_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9820{
9821 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9822 (__v8di) __Y, 5,
9823 (__mmask8) -1);
9824}
9825
275be1da
IT
9826extern __inline __mmask8
9827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9828_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9829{
9830 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9831 (__v8di) __Y, 5,
9832 (__mmask8) __M);
9833}
9834
d256b866
IT
9835extern __inline __mmask8
9836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9837_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9838{
9839 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9840 (__v8di) __Y, 5,
9841 (__mmask8) -1);
9842}
9843
275be1da
IT
9844extern __inline __mmask16
9845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9846_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9847{
9848 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9849 (__v16si) __Y, 2,
9850 (__mmask16) __M);
9851}
9852
d256b866
IT
9853extern __inline __mmask16
9854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9855_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9856{
9857 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9858 (__v16si) __Y, 2,
9859 (__mmask16) -1);
9860}
9861
275be1da
IT
9862extern __inline __mmask16
9863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9864_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9865{
9866 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9867 (__v16si) __Y, 2,
9868 (__mmask16) __M);
9869}
9870
d256b866
IT
9871extern __inline __mmask16
9872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9873_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9874{
9875 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9876 (__v16si) __Y, 2,
9877 (__mmask16) -1);
9878}
9879
275be1da
IT
9880extern __inline __mmask8
9881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9882_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9883{
9884 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9885 (__v8di) __Y, 2,
9886 (__mmask8) __M);
9887}
9888
d256b866
IT
9889extern __inline __mmask8
9890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9891_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9892{
9893 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9894 (__v8di) __Y, 2,
9895 (__mmask8) -1);
9896}
9897
275be1da
IT
9898extern __inline __mmask8
9899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9900_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9901{
9902 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9903 (__v8di) __Y, 2,
9904 (__mmask8) __M);
9905}
9906
d256b866
IT
9907extern __inline __mmask8
9908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9909_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9910{
9911 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9912 (__v8di) __Y, 2,
9913 (__mmask8) -1);
9914}
9915
275be1da
IT
9916extern __inline __mmask16
9917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9918_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9919{
9920 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9921 (__v16si) __Y, 1,
9922 (__mmask16) __M);
9923}
9924
d256b866
IT
9925extern __inline __mmask16
9926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9927_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9928{
9929 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9930 (__v16si) __Y, 1,
9931 (__mmask16) -1);
9932}
9933
275be1da
IT
9934extern __inline __mmask16
9935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9936_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9937{
9938 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9939 (__v16si) __Y, 1,
9940 (__mmask16) __M);
9941}
9942
d256b866
IT
9943extern __inline __mmask16
9944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9945_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9946{
9947 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9948 (__v16si) __Y, 1,
9949 (__mmask16) -1);
9950}
9951
275be1da
IT
9952extern __inline __mmask8
9953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9954_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9955{
9956 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9957 (__v8di) __Y, 1,
9958 (__mmask8) __M);
9959}
9960
d256b866
IT
9961extern __inline __mmask8
9962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9963_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9964{
9965 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9966 (__v8di) __Y, 1,
9967 (__mmask8) -1);
9968}
9969
275be1da
IT
9970extern __inline __mmask8
9971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9972_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9973{
9974 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9975 (__v8di) __Y, 1,
9976 (__mmask8) __M);
9977}
9978
d256b866
IT
9979extern __inline __mmask8
9980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9981_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9982{
9983 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9984 (__v8di) __Y, 1,
9985 (__mmask8) -1);
9986}
9987
9988extern __inline __mmask16
9989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9990_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9991{
9992 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9993 (__v16si) __Y, 4,
9994 (__mmask16) -1);
9995}
9996
275be1da
IT
9997extern __inline __mmask16
9998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9999_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
10000{
10001 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
10002 (__v16si) __Y, 4,
10003 (__mmask16) __M);
10004}
10005
10006extern __inline __mmask16
10007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10008_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
10009{
10010 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
10011 (__v16si) __Y, 4,
10012 (__mmask16) __M);
10013}
10014
d256b866
IT
10015extern __inline __mmask16
10016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10017_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
10018{
10019 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
10020 (__v16si) __Y, 4,
10021 (__mmask16) -1);
10022}
10023
275be1da
IT
10024extern __inline __mmask8
10025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 10026_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
275be1da
IT
10027{
10028 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
10029 (__v8di) __Y, 4,
10030 (__mmask8) __M);
10031}
10032
d256b866
IT
10033extern __inline __mmask8
10034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10035_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
10036{
10037 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
10038 (__v8di) __Y, 4,
10039 (__mmask8) -1);
10040}
10041
275be1da
IT
10042extern __inline __mmask8
10043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10044_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
10045{
10046 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
10047 (__v8di) __Y, 4,
10048 (__mmask8) __M);
10049}
10050
d256b866
IT
10051extern __inline __mmask8
10052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10053_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
10054{
10055 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
10056 (__v8di) __Y, 4,
10057 (__mmask8) -1);
10058}
10059
756c5857
AI
10060#define _MM_CMPINT_EQ 0x0
10061#define _MM_CMPINT_LT 0x1
10062#define _MM_CMPINT_LE 0x2
10063#define _MM_CMPINT_UNUSED 0x3
10064#define _MM_CMPINT_NE 0x4
10065#define _MM_CMPINT_NLT 0x5
10066#define _MM_CMPINT_GE 0x5
10067#define _MM_CMPINT_NLE 0x6
10068#define _MM_CMPINT_GT 0x6
10069
10070#ifdef __OPTIMIZE__
d8ea3e7c
AS
10071extern __inline __mmask16
10072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10073_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
10074{
10075 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
10076 (__mmask8) __B);
10077}
10078
10079extern __inline __mmask16
10080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
10082{
10083 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
10084 (__mmask8) __B);
10085}
10086
756c5857
AI
10087extern __inline __mmask8
10088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10089_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
10090{
10091 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
10092 (__v8di) __Y, __P,
10093 (__mmask8) -1);
10094}
10095
10096extern __inline __mmask16
10097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10098_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
10099{
10100 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
10101 (__v16si) __Y, __P,
10102 (__mmask16) -1);
10103}
10104
10105extern __inline __mmask8
10106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10107_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
10108{
10109 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
10110 (__v8di) __Y, __P,
10111 (__mmask8) -1);
10112}
10113
10114extern __inline __mmask16
10115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10116_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
10117{
10118 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
10119 (__v16si) __Y, __P,
10120 (__mmask16) -1);
10121}
10122
10123extern __inline __mmask8
10124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10125_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
10126 const int __R)
10127{
10128 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
10129 (__v8df) __Y, __P,
10130 (__mmask8) -1, __R);
10131}
10132
10133extern __inline __mmask16
10134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10135_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
10136{
10137 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
10138 (__v16sf) __Y, __P,
10139 (__mmask16) -1, __R);
10140}
10141
10142extern __inline __mmask8
10143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10144_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
10145 const int __P)
10146{
10147 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
10148 (__v8di) __Y, __P,
10149 (__mmask8) __U);
10150}
10151
10152extern __inline __mmask16
10153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10154_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
10155 const int __P)
10156{
10157 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
10158 (__v16si) __Y, __P,
10159 (__mmask16) __U);
10160}
10161
10162extern __inline __mmask8
10163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10164_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
10165 const int __P)
10166{
10167 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
10168 (__v8di) __Y, __P,
10169 (__mmask8) __U);
10170}
10171
10172extern __inline __mmask16
10173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10174_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
10175 const int __P)
10176{
10177 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
10178 (__v16si) __Y, __P,
10179 (__mmask16) __U);
10180}
10181
10182extern __inline __mmask8
10183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10184_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
10185 const int __P, const int __R)
10186{
10187 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
10188 (__v8df) __Y, __P,
10189 (__mmask8) __U, __R);
10190}
10191
10192extern __inline __mmask16
10193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10194_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
10195 const int __P, const int __R)
10196{
10197 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
10198 (__v16sf) __Y, __P,
10199 (__mmask16) __U, __R);
10200}
10201
10202extern __inline __mmask8
10203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10204_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
10205{
10206 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
10207 (__v2df) __Y, __P,
10208 (__mmask8) -1, __R);
10209}
10210
10211extern __inline __mmask8
10212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10213_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
10214 const int __P, const int __R)
10215{
10216 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
10217 (__v2df) __Y, __P,
10218 (__mmask8) __M, __R);
10219}
10220
10221extern __inline __mmask8
10222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10223_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
10224{
10225 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
10226 (__v4sf) __Y, __P,
10227 (__mmask8) -1, __R);
10228}
10229
10230extern __inline __mmask8
10231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10232_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
10233 const int __P, const int __R)
10234{
10235 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
10236 (__v4sf) __Y, __P,
10237 (__mmask8) __M, __R);
10238}
10239
10240#else
d8ea3e7c
AS
10241#define _kshiftli_mask16(X, Y) \
10242 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
10243
10244#define _kshiftri_mask16(X, Y) \
10245 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
10246
756c5857
AI
10247#define _mm512_cmp_epi64_mask(X, Y, P) \
10248 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
10249 (__v8di)(__m512i)(Y), (int)(P),\
10250 (__mmask8)-1))
10251
10252#define _mm512_cmp_epi32_mask(X, Y, P) \
383321ec
UB
10253 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
10254 (__v16si)(__m512i)(Y), (int)(P), \
10255 (__mmask16)-1))
756c5857
AI
10256
10257#define _mm512_cmp_epu64_mask(X, Y, P) \
10258 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
10259 (__v8di)(__m512i)(Y), (int)(P),\
10260 (__mmask8)-1))
10261
10262#define _mm512_cmp_epu32_mask(X, Y, P) \
383321ec
UB
10263 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
10264 (__v16si)(__m512i)(Y), (int)(P), \
10265 (__mmask16)-1))
756c5857 10266
383321ec 10267#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
756c5857
AI
10268 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
10269 (__v8df)(__m512d)(Y), (int)(P),\
10270 (__mmask8)-1, R))
10271
383321ec 10272#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
756c5857
AI
10273 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
10274 (__v16sf)(__m512)(Y), (int)(P),\
10275 (__mmask16)-1, R))
10276
383321ec 10277#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
756c5857
AI
10278 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
10279 (__v8di)(__m512i)(Y), (int)(P),\
0c8217b1 10280 (__mmask8)(M)))
756c5857 10281
383321ec
UB
10282#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
10283 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
10284 (__v16si)(__m512i)(Y), (int)(P), \
0c8217b1 10285 (__mmask16)(M)))
756c5857 10286
383321ec 10287#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
756c5857
AI
10288 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
10289 (__v8di)(__m512i)(Y), (int)(P),\
0c8217b1 10290 (__mmask8)(M)))
756c5857 10291
383321ec
UB
10292#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
10293 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
10294 (__v16si)(__m512i)(Y), (int)(P), \
0c8217b1 10295 (__mmask16)(M)))
756c5857 10296
383321ec 10297#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
756c5857
AI
10298 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
10299 (__v8df)(__m512d)(Y), (int)(P),\
0c8217b1 10300 (__mmask8)(M), R))
756c5857 10301
383321ec 10302#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
756c5857
AI
10303 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
10304 (__v16sf)(__m512)(Y), (int)(P),\
0c8217b1 10305 (__mmask16)(M), R))
756c5857 10306
383321ec 10307#define _mm_cmp_round_sd_mask(X, Y, P, R) \
756c5857
AI
10308 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
10309 (__v2df)(__m128d)(Y), (int)(P),\
10310 (__mmask8)-1, R))
10311
383321ec 10312#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
756c5857
AI
10313 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
10314 (__v2df)(__m128d)(Y), (int)(P),\
10315 (M), R))
10316
383321ec 10317#define _mm_cmp_round_ss_mask(X, Y, P, R) \
756c5857
AI
10318 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
10319 (__v4sf)(__m128)(Y), (int)(P), \
10320 (__mmask8)-1, R))
10321
383321ec 10322#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
756c5857
AI
10323 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
10324 (__v4sf)(__m128)(Y), (int)(P), \
10325 (M), R))
10326#endif
10327
10328#ifdef __OPTIMIZE__
10329extern __inline __m512
10330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10331_mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 10332{
b5fd0b71
JJ
10333 __m512 __v1_old = _mm512_undefined_ps ();
10334 __mmask16 __mask = 0xFFFF;
756c5857 10335
b5fd0b71 10336 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
10337 __addr,
10338 (__v16si) __index,
b5fd0b71 10339 __mask, __scale);
756c5857
AI
10340}
10341
10342extern __inline __m512
10343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 10344_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
0e171d54 10345 __m512i __index, void const *__addr, int __scale)
756c5857 10346{
b5fd0b71 10347 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
10348 __addr,
10349 (__v16si) __index,
10350 __mask, __scale);
10351}
10352
10353extern __inline __m512d
10354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10355_mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
756c5857 10356{
b5fd0b71
JJ
10357 __m512d __v1_old = _mm512_undefined_pd ();
10358 __mmask8 __mask = 0xFF;
756c5857 10359
b5fd0b71 10360 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
756c5857 10361 __addr,
b5fd0b71 10362 (__v8si) __index, __mask,
756c5857
AI
10363 __scale);
10364}
10365
10366extern __inline __m512d
10367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10368_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 10369 __m256i __index, void const *__addr, int __scale)
756c5857
AI
10370{
10371 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
10372 __addr,
10373 (__v8si) __index,
10374 __mask, __scale);
10375}
10376
10377extern __inline __m256
10378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10379_mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 10380{
b5fd0b71
JJ
10381 __m256 __v1_old = _mm256_undefined_ps ();
10382 __mmask8 __mask = 0xFF;
756c5857 10383
b5fd0b71 10384 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
756c5857 10385 __addr,
b5fd0b71 10386 (__v8di) __index, __mask,
756c5857
AI
10387 __scale);
10388}
10389
10390extern __inline __m256
10391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10392_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
0e171d54 10393 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10394{
10395 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
10396 __addr,
10397 (__v8di) __index,
10398 __mask, __scale);
10399}
10400
10401extern __inline __m512d
10402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10403_mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
756c5857 10404{
b5fd0b71
JJ
10405 __m512d __v1_old = _mm512_undefined_pd ();
10406 __mmask8 __mask = 0xFF;
756c5857 10407
b5fd0b71 10408 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
756c5857 10409 __addr,
b5fd0b71 10410 (__v8di) __index, __mask,
756c5857
AI
10411 __scale);
10412}
10413
10414extern __inline __m512d
10415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10416_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 10417 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10418{
10419 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
10420 __addr,
10421 (__v8di) __index,
10422 __mask, __scale);
10423}
10424
10425extern __inline __m512i
10426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10427_mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 10428{
b5fd0b71
JJ
10429 __m512i __v1_old = _mm512_undefined_epi32 ();
10430 __mmask16 __mask = 0xFFFF;
756c5857 10431
b5fd0b71 10432 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
756c5857
AI
10433 __addr,
10434 (__v16si) __index,
b5fd0b71 10435 __mask, __scale);
756c5857
AI
10436}
10437
10438extern __inline __m512i
10439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10440_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
0e171d54 10441 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10442{
10443 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
10444 __addr,
10445 (__v16si) __index,
10446 __mask, __scale);
10447}
10448
10449extern __inline __m512i
10450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10451_mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
756c5857 10452{
b5fd0b71
JJ
10453 __m512i __v1_old = _mm512_undefined_epi32 ();
10454 __mmask8 __mask = 0xFF;
756c5857 10455
b5fd0b71 10456 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
756c5857 10457 __addr,
b5fd0b71 10458 (__v8si) __index, __mask,
756c5857
AI
10459 __scale);
10460}
10461
10462extern __inline __m512i
10463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10464_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 10465 __m256i __index, void const *__addr,
756c5857
AI
10466 int __scale)
10467{
10468 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
10469 __addr,
10470 (__v8si) __index,
10471 __mask, __scale);
10472}
10473
10474extern __inline __m256i
10475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10476_mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 10477{
b5fd0b71
JJ
10478 __m256i __v1_old = _mm256_undefined_si256 ();
10479 __mmask8 __mask = 0xFF;
756c5857 10480
b5fd0b71 10481 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
756c5857
AI
10482 __addr,
10483 (__v8di) __index,
b5fd0b71 10484 __mask, __scale);
756c5857
AI
10485}
10486
10487extern __inline __m256i
10488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10489_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
0e171d54 10490 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10491{
10492 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10493 __addr,
10494 (__v8di) __index,
10495 __mask, __scale);
10496}
10497
10498extern __inline __m512i
10499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10500_mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
756c5857 10501{
b5fd0b71
JJ
10502 __m512i __v1_old = _mm512_undefined_epi32 ();
10503 __mmask8 __mask = 0xFF;
756c5857 10504
b5fd0b71 10505 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
756c5857 10506 __addr,
b5fd0b71 10507 (__v8di) __index, __mask,
756c5857
AI
10508 __scale);
10509}
10510
10511extern __inline __m512i
10512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10513_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 10514 __m512i __index, void const *__addr,
756c5857
AI
10515 int __scale)
10516{
10517 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10518 __addr,
10519 (__v8di) __index,
10520 __mask, __scale);
10521}
10522
10523extern __inline void
10524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10525_mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
756c5857
AI
10526{
10527 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
10528 (__v16si) __index, (__v16sf) __v1, __scale);
10529}
10530
10531extern __inline void
10532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10533_mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
756c5857
AI
10534 __m512i __index, __m512 __v1, int __scale)
10535{
10536 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
10537 (__v16sf) __v1, __scale);
10538}
10539
10540extern __inline void
10541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10542_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
756c5857
AI
10543 int __scale)
10544{
10545 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
10546 (__v8si) __index, (__v8df) __v1, __scale);
10547}
10548
10549extern __inline void
10550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10551_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
10552 __m256i __index, __m512d __v1, int __scale)
10553{
10554 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
10555 (__v8df) __v1, __scale);
10556}
10557
10558extern __inline void
10559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10560_mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
756c5857
AI
10561{
10562 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
10563 (__v8di) __index, (__v8sf) __v1, __scale);
10564}
10565
10566extern __inline void
10567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10568_mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
756c5857
AI
10569 __m512i __index, __m256 __v1, int __scale)
10570{
10571 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
10572 (__v8sf) __v1, __scale);
10573}
10574
10575extern __inline void
10576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10577_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
756c5857
AI
10578 int __scale)
10579{
10580 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
10581 (__v8di) __index, (__v8df) __v1, __scale);
10582}
10583
10584extern __inline void
10585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10586_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
10587 __m512i __index, __m512d __v1, int __scale)
10588{
10589 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
10590 (__v8df) __v1, __scale);
10591}
10592
10593extern __inline void
10594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10595_mm512_i32scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
10596 __m512i __v1, int __scale)
10597{
10598 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
10599 (__v16si) __index, (__v16si) __v1, __scale);
10600}
10601
10602extern __inline void
10603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10604_mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
756c5857
AI
10605 __m512i __index, __m512i __v1, int __scale)
10606{
10607 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
10608 (__v16si) __v1, __scale);
10609}
10610
10611extern __inline void
10612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10613_mm512_i32scatter_epi64 (void *__addr, __m256i __index,
756c5857
AI
10614 __m512i __v1, int __scale)
10615{
10616 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
10617 (__v8si) __index, (__v8di) __v1, __scale);
10618}
10619
10620extern __inline void
10621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10622_mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
10623 __m256i __index, __m512i __v1, int __scale)
10624{
10625 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
10626 (__v8di) __v1, __scale);
10627}
10628
10629extern __inline void
10630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10631_mm512_i64scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
10632 __m256i __v1, int __scale)
10633{
10634 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
10635 (__v8di) __index, (__v8si) __v1, __scale);
10636}
10637
10638extern __inline void
10639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10640_mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
756c5857
AI
10641 __m512i __index, __m256i __v1, int __scale)
10642{
10643 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
10644 (__v8si) __v1, __scale);
10645}
10646
10647extern __inline void
10648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10649_mm512_i64scatter_epi64 (void *__addr, __m512i __index,
756c5857
AI
10650 __m512i __v1, int __scale)
10651{
10652 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
10653 (__v8di) __index, (__v8di) __v1, __scale);
10654}
10655
10656extern __inline void
10657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10658_mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
10659 __m512i __index, __m512i __v1, int __scale)
10660{
10661 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
10662 (__v8di) __v1, __scale);
10663}
10664#else
10665#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
0b192937 10666 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
78cef090
JJ
10667 (void const *) (ADDR), \
10668 (__v16si)(__m512i) (INDEX), \
10669 (__mmask16)0xFFFF, \
10670 (int) (SCALE))
756c5857
AI
10671
10672#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10673 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD), \
10674 (void const *) (ADDR), \
10675 (__v16si)(__m512i) (INDEX), \
10676 (__mmask16) (MASK), \
10677 (int) (SCALE))
756c5857
AI
10678
10679#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
0b192937 10680 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
78cef090
JJ
10681 (void const *) (ADDR), \
10682 (__v8si)(__m256i) (INDEX), \
10683 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10684
10685#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10686 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD), \
10687 (void const *) (ADDR), \
10688 (__v8si)(__m256i) (INDEX), \
10689 (__mmask8) (MASK), \
10690 (int) (SCALE))
756c5857
AI
10691
10692#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
0b192937 10693 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
78cef090
JJ
10694 (void const *) (ADDR), \
10695 (__v8di)(__m512i) (INDEX), \
10696 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10697
10698#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10699 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD), \
10700 (void const *) (ADDR), \
10701 (__v8di)(__m512i) (INDEX), \
10702 (__mmask8) (MASK), \
10703 (int) (SCALE))
756c5857
AI
10704
10705#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
0b192937 10706 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
78cef090
JJ
10707 (void const *) (ADDR), \
10708 (__v8di)(__m512i) (INDEX), \
10709 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10710
10711#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10712 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD), \
10713 (void const *) (ADDR), \
10714 (__v8di)(__m512i) (INDEX), \
10715 (__mmask8) (MASK), \
10716 (int) (SCALE))
756c5857
AI
10717
10718#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
78cef090
JJ
10719 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\
10720 (void const *) (ADDR), \
10721 (__v16si)(__m512i) (INDEX), \
10722 (__mmask16)0xFFFF, \
10723 (int) (SCALE))
756c5857
AI
10724
10725#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10726 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD), \
10727 (void const *) (ADDR), \
10728 (__v16si)(__m512i) (INDEX), \
10729 (__mmask16) (MASK), \
10730 (int) (SCALE))
756c5857
AI
10731
10732#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
78cef090
JJ
10733 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\
10734 (void const *) (ADDR), \
10735 (__v8si)(__m256i) (INDEX), \
10736 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10737
10738#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10739 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD), \
10740 (void const *) (ADDR), \
10741 (__v8si)(__m256i) (INDEX), \
10742 (__mmask8) (MASK), \
10743 (int) (SCALE))
10744
10745#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
10746 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\
10747 (void const *) (ADDR), \
10748 (__v8di)(__m512i) (INDEX), \
10749 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10750
10751#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10752 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD), \
10753 (void const *) (ADDR), \
10754 (__v8di)(__m512i) (INDEX), \
10755 (__mmask8) (MASK), \
10756 (int) (SCALE))
756c5857
AI
10757
10758#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
78cef090
JJ
10759 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\
10760 (void const *) (ADDR), \
10761 (__v8di)(__m512i) (INDEX), \
10762 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10763
10764#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10765 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD), \
10766 (void const *) (ADDR), \
10767 (__v8di)(__m512i) (INDEX), \
10768 (__mmask8) (MASK), \
10769 (int) (SCALE))
756c5857
AI
10770
10771#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10772 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF, \
10773 (__v16si)(__m512i) (INDEX), \
10774 (__v16sf)(__m512) (V1), (int) (SCALE))
756c5857
AI
10775
10776#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10777 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
10778 (__v16si)(__m512i) (INDEX), \
10779 (__v16sf)(__m512) (V1), (int) (SCALE))
756c5857
AI
10780
10781#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10782 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF, \
10783 (__v8si)(__m256i) (INDEX), \
10784 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10785
10786#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10787 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK), \
10788 (__v8si)(__m256i) (INDEX), \
10789 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10790
10791#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10792 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF, \
10793 (__v8di)(__m512i) (INDEX), \
10794 (__v8sf)(__m256) (V1), (int) (SCALE))
756c5857
AI
10795
10796#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10797 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
10798 (__v8di)(__m512i) (INDEX), \
10799 (__v8sf)(__m256) (V1), (int) (SCALE))
756c5857
AI
10800
10801#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10802 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF, \
10803 (__v8di)(__m512i) (INDEX), \
10804 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10805
10806#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10807 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK), \
10808 (__v8di)(__m512i) (INDEX), \
10809 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10810
10811#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10812 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF, \
10813 (__v16si)(__m512i) (INDEX), \
10814 (__v16si)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10815
10816#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10817 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK), \
10818 (__v16si)(__m512i) (INDEX), \
10819 (__v16si)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10820
10821#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10822 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF, \
10823 (__v8si)(__m256i) (INDEX), \
10824 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10825
10826#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10827 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK), \
10828 (__v8si)(__m256i) (INDEX), \
10829 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10830
10831#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10832 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF, \
10833 (__v8di)(__m512i) (INDEX), \
10834 (__v8si)(__m256i) (V1), (int) (SCALE))
756c5857
AI
10835
10836#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10837 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK), \
10838 (__v8di)(__m512i) (INDEX), \
10839 (__v8si)(__m256i) (V1), (int) (SCALE))
756c5857
AI
10840
10841#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10842 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF, \
10843 (__v8di)(__m512i) (INDEX), \
10844 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10845
10846#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10847 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK), \
10848 (__v8di)(__m512i) (INDEX), \
10849 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10850#endif
10851
10852extern __inline __m512d
10853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10854_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10855{
10856 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10857 (__v8df) __W,
10858 (__mmask8) __U);
10859}
10860
10861extern __inline __m512d
10862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10863_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10864{
10865 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10866 (__v8df)
10867 _mm512_setzero_pd (),
10868 (__mmask8) __U);
10869}
10870
10871extern __inline void
10872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10873_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10874{
10875 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10876 (__mmask8) __U);
10877}
10878
10879extern __inline __m512
10880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10881_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10882{
10883 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10884 (__v16sf) __W,
10885 (__mmask16) __U);
10886}
10887
10888extern __inline __m512
10889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10890_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10891{
10892 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10893 (__v16sf)
10894 _mm512_setzero_ps (),
10895 (__mmask16) __U);
10896}
10897
10898extern __inline void
10899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10900_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10901{
10902 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10903 (__mmask16) __U);
10904}
10905
10906extern __inline __m512i
10907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10908_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10909{
10910 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10911 (__v8di) __W,
10912 (__mmask8) __U);
10913}
10914
10915extern __inline __m512i
10916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10917_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10918{
10919 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10920 (__v8di)
10921 _mm512_setzero_si512 (),
10922 (__mmask8) __U);
10923}
10924
10925extern __inline void
10926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10927_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10928{
10929 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10930 (__mmask8) __U);
10931}
10932
10933extern __inline __m512i
10934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10935_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10936{
10937 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10938 (__v16si) __W,
10939 (__mmask16) __U);
10940}
10941
10942extern __inline __m512i
10943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10944_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10945{
10946 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10947 (__v16si)
10948 _mm512_setzero_si512 (),
10949 (__mmask16) __U);
10950}
10951
10952extern __inline void
10953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10954_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10955{
10956 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10957 (__mmask16) __U);
10958}
10959
10960extern __inline __m512d
10961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10962_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10963{
10964 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10965 (__v8df) __W,
10966 (__mmask8) __U);
10967}
10968
10969extern __inline __m512d
10970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10971_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10972{
10973 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10974 (__v8df)
10975 _mm512_setzero_pd (),
10976 (__mmask8) __U);
10977}
10978
10979extern __inline __m512d
10980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10981_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10982{
10983 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10984 (__v8df) __W,
10985 (__mmask8) __U);
10986}
10987
10988extern __inline __m512d
10989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10990_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10991{
10992 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10993 (__v8df)
10994 _mm512_setzero_pd (),
10995 (__mmask8) __U);
10996}
10997
10998extern __inline __m512
10999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11000_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
11001{
11002 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
11003 (__v16sf) __W,
11004 (__mmask16) __U);
11005}
11006
11007extern __inline __m512
11008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11009_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
11010{
11011 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
11012 (__v16sf)
11013 _mm512_setzero_ps (),
11014 (__mmask16) __U);
11015}
11016
11017extern __inline __m512
11018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11019_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
11020{
11021 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
11022 (__v16sf) __W,
11023 (__mmask16) __U);
11024}
11025
11026extern __inline __m512
11027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11028_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
11029{
11030 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
11031 (__v16sf)
11032 _mm512_setzero_ps (),
11033 (__mmask16) __U);
11034}
11035
11036extern __inline __m512i
11037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11038_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
11039{
11040 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
11041 (__v8di) __W,
11042 (__mmask8) __U);
11043}
11044
11045extern __inline __m512i
11046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11047_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
11048{
11049 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
11050 (__v8di)
11051 _mm512_setzero_si512 (),
11052 (__mmask8) __U);
11053}
11054
11055extern __inline __m512i
11056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
11058{
11059 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
11060 (__v8di) __W,
11061 (__mmask8) __U);
11062}
11063
11064extern __inline __m512i
11065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11066_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
11067{
11068 return (__m512i)
11069 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
11070 (__v8di)
11071 _mm512_setzero_si512 (),
11072 (__mmask8) __U);
11073}
11074
11075extern __inline __m512i
11076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11077_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
11078{
11079 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
11080 (__v16si) __W,
11081 (__mmask16) __U);
11082}
11083
11084extern __inline __m512i
11085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11086_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
11087{
11088 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
11089 (__v16si)
11090 _mm512_setzero_si512 (),
11091 (__mmask16) __U);
11092}
11093
11094extern __inline __m512i
11095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11096_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
11097{
11098 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
11099 (__v16si) __W,
11100 (__mmask16) __U);
11101}
11102
11103extern __inline __m512i
11104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11105_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
11106{
11107 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
11108 (__v16si)
11109 _mm512_setzero_si512
11110 (), (__mmask16) __U);
11111}
11112
11113/* Mask arithmetic operations */
6901ea62
AS
11114#define _kand_mask16 _mm512_kand
11115#define _kandn_mask16 _mm512_kandn
11116#define _knot_mask16 _mm512_knot
11117#define _kor_mask16 _mm512_kor
11118#define _kxnor_mask16 _mm512_kxnor
11119#define _kxor_mask16 _mm512_kxor
11120
dea06111
AS
11121extern __inline unsigned char
11122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11123_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
11124{
11125 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
11126 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
11127}
11128
11129extern __inline unsigned char
11130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11131_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
11132{
11133 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
11134 (__mmask16) __B);
11135}
11136
11137extern __inline unsigned char
11138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11139_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
11140{
11141 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
11142 (__mmask16) __B);
11143}
11144
7cdb6e4c
AS
11145extern __inline unsigned int
11146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147_cvtmask16_u32 (__mmask16 __A)
11148{
11149 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
11150}
11151
11152extern __inline __mmask16
11153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11154_cvtu32_mask16 (unsigned int __A)
11155{
11156 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
11157}
11158
11159extern __inline __mmask16
11160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11161_load_mask16 (__mmask16 *__A)
11162{
11163 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
11164}
11165
11166extern __inline void
11167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11168_store_mask16 (__mmask16 *__A, __mmask16 __B)
11169{
11170 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
11171}
11172
756c5857
AI
11173extern __inline __mmask16
11174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11175_mm512_kand (__mmask16 __A, __mmask16 __B)
11176{
11177 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
11178}
11179
11180extern __inline __mmask16
11181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11182_mm512_kandn (__mmask16 __A, __mmask16 __B)
11183{
6901ea62
AS
11184 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
11185 (__mmask16) __B);
756c5857
AI
11186}
11187
11188extern __inline __mmask16
11189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11190_mm512_kor (__mmask16 __A, __mmask16 __B)
11191{
11192 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
11193}
11194
11195extern __inline int
11196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11197_mm512_kortestz (__mmask16 __A, __mmask16 __B)
11198{
11199 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
11200 (__mmask16) __B);
11201}
11202
11203extern __inline int
11204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11205_mm512_kortestc (__mmask16 __A, __mmask16 __B)
11206{
11207 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
11208 (__mmask16) __B);
11209}
11210
11211extern __inline __mmask16
11212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11213_mm512_kxnor (__mmask16 __A, __mmask16 __B)
11214{
11215 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
11216}
11217
11218extern __inline __mmask16
11219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11220_mm512_kxor (__mmask16 __A, __mmask16 __B)
11221{
11222 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
11223}
11224
11225extern __inline __mmask16
11226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11227_mm512_knot (__mmask16 __A)
11228{
11229 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
11230}
11231
11232extern __inline __mmask16
11233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11234_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
11235{
11236 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
11237}
11238
6901ea62
AS
11239extern __inline __mmask16
11240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11241_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
11242{
11243 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
11244}
11245
756c5857
AI
11246#ifdef __OPTIMIZE__
11247extern __inline __m512i
11248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11249_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
11250 const int __imm)
11251{
11252 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
11253 (__v4si) __D,
11254 __imm,
11255 (__v16si)
11256 _mm512_setzero_si512 (),
11257 __B);
11258}
11259
11260extern __inline __m512
11261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11262_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
11263 const int __imm)
11264{
11265 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
11266 (__v4sf) __D,
11267 __imm,
11268 (__v16sf)
11269 _mm512_setzero_ps (), __B);
11270}
11271
11272extern __inline __m512i
11273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11274_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
11275 __m128i __D, const int __imm)
11276{
11277 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
11278 (__v4si) __D,
11279 __imm,
11280 (__v16si) __A,
11281 __B);
11282}
11283
11284extern __inline __m512
11285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11286_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
11287 __m128 __D, const int __imm)
11288{
11289 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
11290 (__v4sf) __D,
11291 __imm,
11292 (__v16sf) __A, __B);
11293}
11294#else
11295#define _mm512_maskz_insertf32x4(A, X, Y, C) \
11296 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
11297 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
18379eea 11298 (__mmask16)(A)))
756c5857
AI
11299
11300#define _mm512_maskz_inserti32x4(A, X, Y, C) \
11301 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
11302 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
18379eea 11303 (__mmask16)(A)))
756c5857
AI
11304
11305#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
11306 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
11307 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
18379eea 11308 (__mmask16)(B)))
756c5857
AI
11309
11310#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
11311 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
11312 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
18379eea 11313 (__mmask16)(B)))
756c5857
AI
11314#endif
11315
11316extern __inline __m512i
11317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11318_mm512_max_epi64 (__m512i __A, __m512i __B)
11319{
11320 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11321 (__v8di) __B,
11322 (__v8di)
4271e5cb 11323 _mm512_undefined_epi32 (),
756c5857
AI
11324 (__mmask8) -1);
11325}
11326
11327extern __inline __m512i
11328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11329_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
11330{
11331 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11332 (__v8di) __B,
11333 (__v8di)
11334 _mm512_setzero_si512 (),
11335 __M);
11336}
11337
11338extern __inline __m512i
11339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11340_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11341{
11342 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11343 (__v8di) __B,
11344 (__v8di) __W, __M);
11345}
11346
11347extern __inline __m512i
11348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11349_mm512_min_epi64 (__m512i __A, __m512i __B)
11350{
11351 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11352 (__v8di) __B,
11353 (__v8di)
4271e5cb 11354 _mm512_undefined_epi32 (),
756c5857
AI
11355 (__mmask8) -1);
11356}
11357
11358extern __inline __m512i
11359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11360_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11361{
11362 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11363 (__v8di) __B,
11364 (__v8di) __W, __M);
11365}
11366
11367extern __inline __m512i
11368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11369_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
11370{
11371 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11372 (__v8di) __B,
11373 (__v8di)
11374 _mm512_setzero_si512 (),
11375 __M);
11376}
11377
11378extern __inline __m512i
11379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11380_mm512_max_epu64 (__m512i __A, __m512i __B)
11381{
11382 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11383 (__v8di) __B,
11384 (__v8di)
4271e5cb 11385 _mm512_undefined_epi32 (),
756c5857
AI
11386 (__mmask8) -1);
11387}
11388
11389extern __inline __m512i
11390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11391_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11392{
11393 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11394 (__v8di) __B,
11395 (__v8di)
11396 _mm512_setzero_si512 (),
11397 __M);
11398}
11399
11400extern __inline __m512i
11401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11402_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11403{
11404 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11405 (__v8di) __B,
11406 (__v8di) __W, __M);
11407}
11408
11409extern __inline __m512i
11410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11411_mm512_min_epu64 (__m512i __A, __m512i __B)
11412{
11413 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11414 (__v8di) __B,
11415 (__v8di)
4271e5cb 11416 _mm512_undefined_epi32 (),
756c5857
AI
11417 (__mmask8) -1);
11418}
11419
11420extern __inline __m512i
11421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11422_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11423{
11424 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11425 (__v8di) __B,
11426 (__v8di) __W, __M);
11427}
11428
11429extern __inline __m512i
11430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11431_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11432{
11433 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11434 (__v8di) __B,
11435 (__v8di)
11436 _mm512_setzero_si512 (),
11437 __M);
11438}
11439
11440extern __inline __m512i
11441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11442_mm512_max_epi32 (__m512i __A, __m512i __B)
11443{
11444 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11445 (__v16si) __B,
11446 (__v16si)
4271e5cb 11447 _mm512_undefined_epi32 (),
756c5857
AI
11448 (__mmask16) -1);
11449}
11450
11451extern __inline __m512i
11452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11453_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11454{
11455 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11456 (__v16si) __B,
11457 (__v16si)
11458 _mm512_setzero_si512 (),
11459 __M);
11460}
11461
11462extern __inline __m512i
11463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11464_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11465{
11466 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11467 (__v16si) __B,
11468 (__v16si) __W, __M);
11469}
11470
11471extern __inline __m512i
11472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11473_mm512_min_epi32 (__m512i __A, __m512i __B)
11474{
11475 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11476 (__v16si) __B,
11477 (__v16si)
4271e5cb 11478 _mm512_undefined_epi32 (),
756c5857
AI
11479 (__mmask16) -1);
11480}
11481
11482extern __inline __m512i
11483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11484_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11485{
11486 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11487 (__v16si) __B,
11488 (__v16si)
11489 _mm512_setzero_si512 (),
11490 __M);
11491}
11492
11493extern __inline __m512i
11494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11495_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11496{
11497 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11498 (__v16si) __B,
11499 (__v16si) __W, __M);
11500}
11501
11502extern __inline __m512i
11503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11504_mm512_max_epu32 (__m512i __A, __m512i __B)
11505{
11506 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11507 (__v16si) __B,
11508 (__v16si)
4271e5cb 11509 _mm512_undefined_epi32 (),
756c5857
AI
11510 (__mmask16) -1);
11511}
11512
11513extern __inline __m512i
11514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11515_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11516{
11517 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11518 (__v16si) __B,
11519 (__v16si)
11520 _mm512_setzero_si512 (),
11521 __M);
11522}
11523
11524extern __inline __m512i
11525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11526_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11527{
11528 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11529 (__v16si) __B,
11530 (__v16si) __W, __M);
11531}
11532
11533extern __inline __m512i
11534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11535_mm512_min_epu32 (__m512i __A, __m512i __B)
11536{
11537 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11538 (__v16si) __B,
11539 (__v16si)
4271e5cb 11540 _mm512_undefined_epi32 (),
756c5857
AI
11541 (__mmask16) -1);
11542}
11543
11544extern __inline __m512i
11545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11546_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11547{
11548 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11549 (__v16si) __B,
11550 (__v16si)
11551 _mm512_setzero_si512 (),
11552 __M);
11553}
11554
11555extern __inline __m512i
11556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11557_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11558{
11559 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11560 (__v16si) __B,
11561 (__v16si) __W, __M);
11562}
11563
11564extern __inline __m512
11565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11566_mm512_unpacklo_ps (__m512 __A, __m512 __B)
11567{
11568 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11569 (__v16sf) __B,
11570 (__v16sf)
0b192937 11571 _mm512_undefined_ps (),
756c5857
AI
11572 (__mmask16) -1);
11573}
11574
11575extern __inline __m512
11576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11577_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11578{
11579 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11580 (__v16sf) __B,
11581 (__v16sf) __W,
11582 (__mmask16) __U);
11583}
11584
11585extern __inline __m512
11586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11587_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
11588{
11589 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11590 (__v16sf) __B,
11591 (__v16sf)
11592 _mm512_setzero_ps (),
11593 (__mmask16) __U);
11594}
11595
075691af
AI
11596#ifdef __OPTIMIZE__
11597extern __inline __m128d
11598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11599_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
11600{
11601 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
11602 (__v2df) __B,
11603 __R);
11604}
11605
f4ee3a9e
UB
11606extern __inline __m128d
11607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11608_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11609 __m128d __B, const int __R)
11610{
11611 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11612 (__v2df) __B,
11613 (__v2df) __W,
11614 (__mmask8) __U, __R);
11615}
11616
11617extern __inline __m128d
11618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11619_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11620 const int __R)
11621{
11622 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11623 (__v2df) __B,
11624 (__v2df)
11625 _mm_setzero_pd (),
11626 (__mmask8) __U, __R);
11627}
11628
075691af
AI
11629extern __inline __m128
11630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11631_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
11632{
11633 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
11634 (__v4sf) __B,
11635 __R);
11636}
11637
f4ee3a9e
UB
11638extern __inline __m128
11639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11640_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11641 __m128 __B, const int __R)
11642{
11643 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11644 (__v4sf) __B,
11645 (__v4sf) __W,
11646 (__mmask8) __U, __R);
11647}
11648
11649extern __inline __m128
11650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11651_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11652 const int __R)
11653{
11654 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11655 (__v4sf) __B,
11656 (__v4sf)
11657 _mm_setzero_ps (),
11658 (__mmask8) __U, __R);
11659}
11660
075691af
AI
11661extern __inline __m128d
11662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11663_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
11664{
11665 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
11666 (__v2df) __B,
11667 __R);
11668}
11669
f4ee3a9e
UB
11670extern __inline __m128d
11671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11673 __m128d __B, const int __R)
11674{
11675 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11676 (__v2df) __B,
11677 (__v2df) __W,
11678 (__mmask8) __U, __R);
11679}
11680
11681extern __inline __m128d
11682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11683_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11684 const int __R)
11685{
11686 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11687 (__v2df) __B,
11688 (__v2df)
11689 _mm_setzero_pd (),
11690 (__mmask8) __U, __R);
11691}
11692
075691af
AI
11693extern __inline __m128
11694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11695_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
11696{
11697 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
11698 (__v4sf) __B,
11699 __R);
11700}
11701
f4ee3a9e
UB
11702extern __inline __m128
11703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11704_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11705 __m128 __B, const int __R)
11706{
11707 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11708 (__v4sf) __B,
11709 (__v4sf) __W,
11710 (__mmask8) __U, __R);
11711}
11712
11713extern __inline __m128
11714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11715_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11716 const int __R)
11717{
11718 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11719 (__v4sf) __B,
11720 (__v4sf)
11721 _mm_setzero_ps (),
11722 (__mmask8) __U, __R);
11723}
11724
075691af
AI
11725#else
11726#define _mm_max_round_sd(A, B, C) \
f4ee3a9e
UB
11727 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
11728
11729#define _mm_mask_max_round_sd(W, U, A, B, C) \
11730 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
11731
11732#define _mm_maskz_max_round_sd(U, A, B, C) \
11733 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
11734
11735#define _mm_max_round_ss(A, B, C) \
f4ee3a9e
UB
11736 (__m128)__builtin_ia32_maxss_round(A, B, C)
11737
11738#define _mm_mask_max_round_ss(W, U, A, B, C) \
11739 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
11740
11741#define _mm_maskz_max_round_ss(U, A, B, C) \
11742 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
075691af
AI
11743
11744#define _mm_min_round_sd(A, B, C) \
f4ee3a9e
UB
11745 (__m128d)__builtin_ia32_minsd_round(A, B, C)
11746
11747#define _mm_mask_min_round_sd(W, U, A, B, C) \
11748 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11749
11750#define _mm_maskz_min_round_sd(U, A, B, C) \
11751 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
11752
11753#define _mm_min_round_ss(A, B, C) \
f4ee3a9e
UB
11754 (__m128)__builtin_ia32_minss_round(A, B, C)
11755
11756#define _mm_mask_min_round_ss(W, U, A, B, C) \
11757 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11758
11759#define _mm_maskz_min_round_ss(U, A, B, C) \
11760 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11761
075691af
AI
11762#endif
11763
756c5857
AI
11764extern __inline __m512d
11765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11766_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11767{
11768 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11769 (__v8df) __W,
11770 (__mmask8) __U);
11771}
11772
11773extern __inline __m512
11774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11775_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11776{
11777 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11778 (__v16sf) __W,
11779 (__mmask16) __U);
11780}
11781
11782extern __inline __m512i
11783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11784_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11785{
11786 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11787 (__v8di) __W,
11788 (__mmask8) __U);
11789}
11790
11791extern __inline __m512i
11792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11793_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11794{
11795 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11796 (__v16si) __W,
11797 (__mmask16) __U);
11798}
11799
075691af
AI
11800#ifdef __OPTIMIZE__
11801extern __inline __m128d
11802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11803_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11804{
11805 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11806 (__v2df) __A,
11807 (__v2df) __B,
11808 __R);
11809}
11810
11811extern __inline __m128
11812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11813_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11814{
11815 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11816 (__v4sf) __A,
11817 (__v4sf) __B,
11818 __R);
11819}
11820
11821extern __inline __m128d
11822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11823_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11824{
11825 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11826 (__v2df) __A,
11827 -(__v2df) __B,
11828 __R);
11829}
11830
11831extern __inline __m128
11832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11833_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11834{
11835 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11836 (__v4sf) __A,
11837 -(__v4sf) __B,
11838 __R);
11839}
11840
11841extern __inline __m128d
11842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11843_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11844{
11845 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11846 -(__v2df) __A,
11847 (__v2df) __B,
11848 __R);
11849}
11850
11851extern __inline __m128
11852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11853_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11854{
11855 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11856 -(__v4sf) __A,
11857 (__v4sf) __B,
11858 __R);
11859}
11860
11861extern __inline __m128d
11862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11863_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11864{
11865 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11866 -(__v2df) __A,
11867 -(__v2df) __B,
11868 __R);
11869}
11870
11871extern __inline __m128
11872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11873_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11874{
11875 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11876 -(__v4sf) __A,
11877 -(__v4sf) __B,
11878 __R);
11879}
11880#else
11881#define _mm_fmadd_round_sd(A, B, C, R) \
11882 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11883
11884#define _mm_fmadd_round_ss(A, B, C, R) \
11885 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11886
11887#define _mm_fmsub_round_sd(A, B, C, R) \
11888 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11889
11890#define _mm_fmsub_round_ss(A, B, C, R) \
11891 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11892
11893#define _mm_fnmadd_round_sd(A, B, C, R) \
11894 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11895
11896#define _mm_fnmadd_round_ss(A, B, C, R) \
11897 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11898
11899#define _mm_fnmsub_round_sd(A, B, C, R) \
11900 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11901
11902#define _mm_fnmsub_round_ss(A, B, C, R) \
11903 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11904#endif
11905
5c4ade6d
JJ
11906extern __inline __m128d
11907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11908_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11909{
11910 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11911 (__v2df) __A,
11912 (__v2df) __B,
11913 (__mmask8) __U,
11914 _MM_FROUND_CUR_DIRECTION);
11915}
11916
11917extern __inline __m128
11918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11919_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11920{
11921 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11922 (__v4sf) __A,
11923 (__v4sf) __B,
11924 (__mmask8) __U,
11925 _MM_FROUND_CUR_DIRECTION);
11926}
11927
11928extern __inline __m128d
11929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11930_mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11931{
11932 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
11933 (__v2df) __A,
11934 (__v2df) __B,
11935 (__mmask8) __U,
11936 _MM_FROUND_CUR_DIRECTION);
11937}
11938
11939extern __inline __m128
11940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11941_mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11942{
11943 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
11944 (__v4sf) __A,
11945 (__v4sf) __B,
11946 (__mmask8) __U,
11947 _MM_FROUND_CUR_DIRECTION);
11948}
11949
11950extern __inline __m128d
11951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11952_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11953{
11954 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11955 (__v2df) __A,
11956 (__v2df) __B,
11957 (__mmask8) __U,
11958 _MM_FROUND_CUR_DIRECTION);
11959}
11960
11961extern __inline __m128
11962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11963_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11964{
11965 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11966 (__v4sf) __A,
11967 (__v4sf) __B,
11968 (__mmask8) __U,
11969 _MM_FROUND_CUR_DIRECTION);
11970}
11971
11972extern __inline __m128d
11973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11974_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11975{
11976 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11977 (__v2df) __A,
11978 -(__v2df) __B,
11979 (__mmask8) __U,
11980 _MM_FROUND_CUR_DIRECTION);
11981}
11982
11983extern __inline __m128
11984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11985_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11986{
11987 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11988 (__v4sf) __A,
11989 -(__v4sf) __B,
11990 (__mmask8) __U,
11991 _MM_FROUND_CUR_DIRECTION);
11992}
11993
11994extern __inline __m128d
11995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11996_mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11997{
11998 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
11999 (__v2df) __A,
12000 (__v2df) __B,
12001 (__mmask8) __U,
12002 _MM_FROUND_CUR_DIRECTION);
12003}
12004
12005extern __inline __m128
12006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12007_mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
12008{
12009 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12010 (__v4sf) __A,
12011 (__v4sf) __B,
12012 (__mmask8) __U,
12013 _MM_FROUND_CUR_DIRECTION);
12014}
12015
12016extern __inline __m128d
12017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12018_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
12019{
12020 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12021 (__v2df) __A,
12022 -(__v2df) __B,
12023 (__mmask8) __U,
12024 _MM_FROUND_CUR_DIRECTION);
12025}
12026
12027extern __inline __m128
12028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12029_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
12030{
12031 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12032 (__v4sf) __A,
12033 -(__v4sf) __B,
12034 (__mmask8) __U,
12035 _MM_FROUND_CUR_DIRECTION);
12036}
12037
12038extern __inline __m128d
12039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12040_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12041{
12042 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12043 -(__v2df) __A,
12044 (__v2df) __B,
12045 (__mmask8) __U,
12046 _MM_FROUND_CUR_DIRECTION);
12047}
12048
12049extern __inline __m128
12050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12051_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12052{
12053 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12054 -(__v4sf) __A,
12055 (__v4sf) __B,
12056 (__mmask8) __U,
12057 _MM_FROUND_CUR_DIRECTION);
12058}
12059
12060extern __inline __m128d
12061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12062_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
12063{
12064 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
12065 -(__v2df) __A,
12066 (__v2df) __B,
12067 (__mmask8) __U,
12068 _MM_FROUND_CUR_DIRECTION);
12069}
12070
12071extern __inline __m128
12072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12073_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
12074{
12075 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
12076 -(__v4sf) __A,
12077 (__v4sf) __B,
12078 (__mmask8) __U,
12079 _MM_FROUND_CUR_DIRECTION);
12080}
12081
12082extern __inline __m128d
12083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
12085{
12086 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12087 -(__v2df) __A,
12088 (__v2df) __B,
12089 (__mmask8) __U,
12090 _MM_FROUND_CUR_DIRECTION);
12091}
12092
12093extern __inline __m128
12094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12095_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
12096{
12097 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12098 -(__v4sf) __A,
12099 (__v4sf) __B,
12100 (__mmask8) __U,
12101 _MM_FROUND_CUR_DIRECTION);
12102}
12103
12104extern __inline __m128d
12105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12106_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12107{
12108 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12109 -(__v2df) __A,
12110 -(__v2df) __B,
12111 (__mmask8) __U,
12112 _MM_FROUND_CUR_DIRECTION);
12113}
12114
12115extern __inline __m128
12116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12117_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12118{
12119 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12120 -(__v4sf) __A,
12121 -(__v4sf) __B,
12122 (__mmask8) __U,
12123 _MM_FROUND_CUR_DIRECTION);
12124}
12125
12126extern __inline __m128d
12127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12128_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
12129{
12130 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
12131 -(__v2df) __A,
12132 (__v2df) __B,
12133 (__mmask8) __U,
12134 _MM_FROUND_CUR_DIRECTION);
12135}
12136
12137extern __inline __m128
12138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12139_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
12140{
12141 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12142 -(__v4sf) __A,
12143 (__v4sf) __B,
12144 (__mmask8) __U,
12145 _MM_FROUND_CUR_DIRECTION);
12146}
12147
12148extern __inline __m128d
12149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12150_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
12151{
12152 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12153 -(__v2df) __A,
12154 -(__v2df) __B,
12155 (__mmask8) __U,
12156 _MM_FROUND_CUR_DIRECTION);
12157}
12158
12159extern __inline __m128
12160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12161_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
12162{
12163 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12164 -(__v4sf) __A,
12165 -(__v4sf) __B,
12166 (__mmask8) __U,
12167 _MM_FROUND_CUR_DIRECTION);
12168}
12169
12170#ifdef __OPTIMIZE__
12171extern __inline __m128d
12172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12173_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12174 const int __R)
12175{
12176 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12177 (__v2df) __A,
12178 (__v2df) __B,
12179 (__mmask8) __U, __R);
12180}
12181
12182extern __inline __m128
12183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12184_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12185 const int __R)
12186{
12187 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12188 (__v4sf) __A,
12189 (__v4sf) __B,
12190 (__mmask8) __U, __R);
12191}
12192
12193extern __inline __m128d
12194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12195_mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12196 const int __R)
12197{
12198 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
12199 (__v2df) __A,
12200 (__v2df) __B,
12201 (__mmask8) __U, __R);
12202}
12203
12204extern __inline __m128
12205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12206_mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12207 const int __R)
12208{
12209 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
12210 (__v4sf) __A,
12211 (__v4sf) __B,
12212 (__mmask8) __U, __R);
12213}
12214
12215extern __inline __m128d
12216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12217_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12218 const int __R)
12219{
12220 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12221 (__v2df) __A,
12222 (__v2df) __B,
12223 (__mmask8) __U, __R);
12224}
12225
12226extern __inline __m128
12227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12228_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12229 const int __R)
12230{
12231 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12232 (__v4sf) __A,
12233 (__v4sf) __B,
12234 (__mmask8) __U, __R);
12235}
12236
12237extern __inline __m128d
12238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12239_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12240 const int __R)
12241{
12242 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12243 (__v2df) __A,
12244 -(__v2df) __B,
12245 (__mmask8) __U, __R);
12246}
12247
12248extern __inline __m128
12249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12250_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12251 const int __R)
12252{
12253 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12254 (__v4sf) __A,
12255 -(__v4sf) __B,
12256 (__mmask8) __U, __R);
12257}
12258
12259extern __inline __m128d
12260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12261_mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12262 const int __R)
12263{
12264 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
12265 (__v2df) __A,
12266 (__v2df) __B,
12267 (__mmask8) __U, __R);
12268}
12269
12270extern __inline __m128
12271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12272_mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12273 const int __R)
12274{
12275 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12276 (__v4sf) __A,
12277 (__v4sf) __B,
12278 (__mmask8) __U, __R);
12279}
12280
12281extern __inline __m128d
12282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12283_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12284 const int __R)
12285{
12286 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12287 (__v2df) __A,
12288 -(__v2df) __B,
12289 (__mmask8) __U, __R);
12290}
12291
12292extern __inline __m128
12293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12294_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12295 const int __R)
12296{
12297 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12298 (__v4sf) __A,
12299 -(__v4sf) __B,
12300 (__mmask8) __U, __R);
12301}
12302
12303extern __inline __m128d
12304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12305_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12306 const int __R)
12307{
12308 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12309 -(__v2df) __A,
12310 (__v2df) __B,
12311 (__mmask8) __U, __R);
12312}
12313
12314extern __inline __m128
12315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12316_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12317 const int __R)
12318{
12319 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12320 -(__v4sf) __A,
12321 (__v4sf) __B,
12322 (__mmask8) __U, __R);
12323}
12324
12325extern __inline __m128d
12326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12327_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12328 const int __R)
12329{
12330 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
12331 -(__v2df) __A,
12332 (__v2df) __B,
12333 (__mmask8) __U, __R);
12334}
12335
12336extern __inline __m128
12337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12338_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12339 const int __R)
12340{
12341 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
12342 -(__v4sf) __A,
12343 (__v4sf) __B,
12344 (__mmask8) __U, __R);
12345}
12346
12347extern __inline __m128d
12348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12349_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12350 const int __R)
12351{
12352 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12353 -(__v2df) __A,
12354 (__v2df) __B,
12355 (__mmask8) __U, __R);
12356}
12357
12358extern __inline __m128
12359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12360_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12361 const int __R)
12362{
12363 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12364 -(__v4sf) __A,
12365 (__v4sf) __B,
12366 (__mmask8) __U, __R);
12367}
12368
12369extern __inline __m128d
12370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12371_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12372 const int __R)
12373{
12374 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12375 -(__v2df) __A,
12376 -(__v2df) __B,
12377 (__mmask8) __U, __R);
12378}
12379
12380extern __inline __m128
12381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12382_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12383 const int __R)
12384{
12385 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12386 -(__v4sf) __A,
12387 -(__v4sf) __B,
12388 (__mmask8) __U, __R);
12389}
12390
12391extern __inline __m128d
12392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12393_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12394 const int __R)
12395{
12396 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
12397 -(__v2df) __A,
12398 (__v2df) __B,
12399 (__mmask8) __U, __R);
12400}
12401
12402extern __inline __m128
12403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12404_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12405 const int __R)
12406{
12407 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12408 -(__v4sf) __A,
12409 (__v4sf) __B,
12410 (__mmask8) __U, __R);
12411}
12412
12413extern __inline __m128d
12414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12415_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12416 const int __R)
12417{
12418 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12419 -(__v2df) __A,
12420 -(__v2df) __B,
12421 (__mmask8) __U, __R);
12422}
12423
12424extern __inline __m128
12425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12426_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12427 const int __R)
12428{
12429 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12430 -(__v4sf) __A,
12431 -(__v4sf) __B,
12432 (__mmask8) __U, __R);
12433}
12434#else
12435#define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
12436 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
12437
12438#define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
12439 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
12440
12441#define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
12442 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
12443
12444#define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
12445 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
12446
12447#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
aadd9a6e 12448 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
5c4ade6d
JJ
12449
12450#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
aadd9a6e 12451 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
5c4ade6d
JJ
12452
12453#define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
12454 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
12455
12456#define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
12457 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
12458
12459#define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
12460 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
12461
12462#define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
12463 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
12464
12465#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
aadd9a6e 12466 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
5c4ade6d
JJ
12467
12468#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
aadd9a6e 12469 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
5c4ade6d
JJ
12470
12471#define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
12472 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
12473
12474#define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
12475 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
12476
12477#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
12478 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
12479
12480#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
12481 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
12482
12483#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
aadd9a6e 12484 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
5c4ade6d
JJ
12485
12486#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
aadd9a6e 12487 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
5c4ade6d
JJ
12488
12489#define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
12490 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
12491
12492#define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
12493 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
12494
12495#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
12496 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
12497
12498#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
12499 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
12500
12501#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
aadd9a6e 12502 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
5c4ade6d
JJ
12503
12504#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
aadd9a6e 12505 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
5c4ade6d
JJ
12506#endif
12507
756c5857
AI
12508#ifdef __OPTIMIZE__
12509extern __inline int
12510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12511_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
12512{
12513 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
12514}
12515
12516extern __inline int
12517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12518_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
12519{
12520 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
12521}
12522#else
12523#define _mm_comi_round_ss(A, B, C, D)\
12524__builtin_ia32_vcomiss(A, B, C, D)
12525#define _mm_comi_round_sd(A, B, C, D)\
12526__builtin_ia32_vcomisd(A, B, C, D)
12527#endif
12528
12529extern __inline __m512d
12530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12531_mm512_sqrt_pd (__m512d __A)
12532{
12533 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12534 (__v8df)
0b192937 12535 _mm512_undefined_pd (),
756c5857
AI
12536 (__mmask8) -1,
12537 _MM_FROUND_CUR_DIRECTION);
12538}
12539
12540extern __inline __m512d
12541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12542_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
12543{
12544 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12545 (__v8df) __W,
12546 (__mmask8) __U,
12547 _MM_FROUND_CUR_DIRECTION);
12548}
12549
12550extern __inline __m512d
12551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12552_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
12553{
12554 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12555 (__v8df)
12556 _mm512_setzero_pd (),
12557 (__mmask8) __U,
12558 _MM_FROUND_CUR_DIRECTION);
12559}
12560
12561extern __inline __m512
12562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12563_mm512_sqrt_ps (__m512 __A)
12564{
12565 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12566 (__v16sf)
0b192937 12567 _mm512_undefined_ps (),
756c5857
AI
12568 (__mmask16) -1,
12569 _MM_FROUND_CUR_DIRECTION);
12570}
12571
12572extern __inline __m512
12573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12574_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
12575{
12576 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12577 (__v16sf) __W,
12578 (__mmask16) __U,
12579 _MM_FROUND_CUR_DIRECTION);
12580}
12581
12582extern __inline __m512
12583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12584_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
12585{
12586 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12587 (__v16sf)
12588 _mm512_setzero_ps (),
12589 (__mmask16) __U,
12590 _MM_FROUND_CUR_DIRECTION);
12591}
12592
12593extern __inline __m512d
12594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12595_mm512_add_pd (__m512d __A, __m512d __B)
12596{
2069d6fc 12597 return (__m512d) ((__v8df)__A + (__v8df)__B);
756c5857
AI
12598}
12599
12600extern __inline __m512d
12601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12602_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12603{
12604 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
12605 (__v8df) __B,
12606 (__v8df) __W,
12607 (__mmask8) __U,
12608 _MM_FROUND_CUR_DIRECTION);
12609}
12610
12611extern __inline __m512d
12612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12613_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
12614{
12615 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
12616 (__v8df) __B,
12617 (__v8df)
12618 _mm512_setzero_pd (),
12619 (__mmask8) __U,
12620 _MM_FROUND_CUR_DIRECTION);
12621}
12622
12623extern __inline __m512
12624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12625_mm512_add_ps (__m512 __A, __m512 __B)
12626{
2069d6fc 12627 return (__m512) ((__v16sf)__A + (__v16sf)__B);
756c5857
AI
12628}
12629
12630extern __inline __m512
12631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12632_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12633{
12634 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
12635 (__v16sf) __B,
12636 (__v16sf) __W,
12637 (__mmask16) __U,
12638 _MM_FROUND_CUR_DIRECTION);
12639}
12640
12641extern __inline __m512
12642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12643_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
12644{
12645 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
12646 (__v16sf) __B,
12647 (__v16sf)
12648 _mm512_setzero_ps (),
12649 (__mmask16) __U,
12650 _MM_FROUND_CUR_DIRECTION);
12651}
12652
1853f5c7
SP
12653extern __inline __m128d
12654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12655_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12656{
12657 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
12658 (__v2df) __B,
12659 (__v2df) __W,
12660 (__mmask8) __U,
12661 _MM_FROUND_CUR_DIRECTION);
12662}
12663
12664extern __inline __m128d
12665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12666_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
12667{
12668 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
12669 (__v2df) __B,
12670 (__v2df)
12671 _mm_setzero_pd (),
12672 (__mmask8) __U,
12673 _MM_FROUND_CUR_DIRECTION);
12674}
12675
12676extern __inline __m128
12677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12678_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12679{
12680 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
12681 (__v4sf) __B,
12682 (__v4sf) __W,
12683 (__mmask8) __U,
12684 _MM_FROUND_CUR_DIRECTION);
12685}
12686
12687extern __inline __m128
12688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12689_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
12690{
12691 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
12692 (__v4sf) __B,
12693 (__v4sf)
12694 _mm_setzero_ps (),
12695 (__mmask8) __U,
12696 _MM_FROUND_CUR_DIRECTION);
12697}
12698
756c5857
AI
12699extern __inline __m512d
12700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12701_mm512_sub_pd (__m512d __A, __m512d __B)
12702{
2069d6fc 12703 return (__m512d) ((__v8df)__A - (__v8df)__B);
756c5857
AI
12704}
12705
12706extern __inline __m512d
12707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12708_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12709{
12710 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
12711 (__v8df) __B,
12712 (__v8df) __W,
12713 (__mmask8) __U,
12714 _MM_FROUND_CUR_DIRECTION);
12715}
12716
12717extern __inline __m512d
12718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12719_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
12720{
12721 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
12722 (__v8df) __B,
12723 (__v8df)
12724 _mm512_setzero_pd (),
12725 (__mmask8) __U,
12726 _MM_FROUND_CUR_DIRECTION);
12727}
12728
12729extern __inline __m512
12730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12731_mm512_sub_ps (__m512 __A, __m512 __B)
12732{
2069d6fc 12733 return (__m512) ((__v16sf)__A - (__v16sf)__B);
756c5857
AI
12734}
12735
12736extern __inline __m512
12737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12738_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12739{
12740 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
12741 (__v16sf) __B,
12742 (__v16sf) __W,
12743 (__mmask16) __U,
12744 _MM_FROUND_CUR_DIRECTION);
12745}
12746
12747extern __inline __m512
12748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12749_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
12750{
12751 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
12752 (__v16sf) __B,
12753 (__v16sf)
12754 _mm512_setzero_ps (),
12755 (__mmask16) __U,
12756 _MM_FROUND_CUR_DIRECTION);
12757}
12758
1853f5c7
SP
12759extern __inline __m128d
12760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12761_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12762{
12763 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
12764 (__v2df) __B,
12765 (__v2df) __W,
12766 (__mmask8) __U,
12767 _MM_FROUND_CUR_DIRECTION);
12768}
12769
12770extern __inline __m128d
12771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12772_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
12773{
12774 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
12775 (__v2df) __B,
12776 (__v2df)
12777 _mm_setzero_pd (),
12778 (__mmask8) __U,
12779 _MM_FROUND_CUR_DIRECTION);
12780}
12781
12782extern __inline __m128
12783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12784_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12785{
12786 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
12787 (__v4sf) __B,
12788 (__v4sf) __W,
12789 (__mmask8) __U,
12790 _MM_FROUND_CUR_DIRECTION);
12791}
12792
12793extern __inline __m128
12794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12795_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
12796{
12797 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
12798 (__v4sf) __B,
12799 (__v4sf)
12800 _mm_setzero_ps (),
12801 (__mmask8) __U,
12802 _MM_FROUND_CUR_DIRECTION);
12803}
12804
756c5857
AI
12805extern __inline __m512d
12806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12807_mm512_mul_pd (__m512d __A, __m512d __B)
12808{
2069d6fc 12809 return (__m512d) ((__v8df)__A * (__v8df)__B);
756c5857
AI
12810}
12811
12812extern __inline __m512d
12813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12814_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12815{
12816 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
12817 (__v8df) __B,
12818 (__v8df) __W,
12819 (__mmask8) __U,
12820 _MM_FROUND_CUR_DIRECTION);
12821}
12822
12823extern __inline __m512d
12824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12825_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
12826{
12827 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
12828 (__v8df) __B,
12829 (__v8df)
12830 _mm512_setzero_pd (),
12831 (__mmask8) __U,
12832 _MM_FROUND_CUR_DIRECTION);
12833}
12834
12835extern __inline __m512
12836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12837_mm512_mul_ps (__m512 __A, __m512 __B)
12838{
2069d6fc 12839 return (__m512) ((__v16sf)__A * (__v16sf)__B);
756c5857
AI
12840}
12841
12842extern __inline __m512
12843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12844_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12845{
12846 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
12847 (__v16sf) __B,
12848 (__v16sf) __W,
12849 (__mmask16) __U,
12850 _MM_FROUND_CUR_DIRECTION);
12851}
12852
12853extern __inline __m512
12854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12855_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
12856{
12857 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
12858 (__v16sf) __B,
12859 (__v16sf)
12860 _mm512_setzero_ps (),
12861 (__mmask16) __U,
12862 _MM_FROUND_CUR_DIRECTION);
12863}
12864
f4ee3a9e
UB
12865extern __inline __m128d
12866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12867_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
12868 __m128d __B)
12869{
12870 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
12871 (__v2df) __B,
12872 (__v2df) __W,
12873 (__mmask8) __U,
12874 _MM_FROUND_CUR_DIRECTION);
12875}
12876
12877extern __inline __m128d
12878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12879_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
12880{
12881 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
12882 (__v2df) __B,
12883 (__v2df)
12884 _mm_setzero_pd (),
12885 (__mmask8) __U,
12886 _MM_FROUND_CUR_DIRECTION);
12887}
12888
12889extern __inline __m128
12890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12891_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
12892 __m128 __B)
12893{
12894 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
12895 (__v4sf) __B,
12896 (__v4sf) __W,
12897 (__mmask8) __U,
12898 _MM_FROUND_CUR_DIRECTION);
12899}
12900
12901extern __inline __m128
12902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12903_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
12904{
12905 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
12906 (__v4sf) __B,
12907 (__v4sf)
12908 _mm_setzero_ps (),
12909 (__mmask8) __U,
12910 _MM_FROUND_CUR_DIRECTION);
12911}
12912
756c5857
AI
12913extern __inline __m512d
12914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12915_mm512_div_pd (__m512d __M, __m512d __V)
12916{
2069d6fc 12917 return (__m512d) ((__v8df)__M / (__v8df)__V);
756c5857
AI
12918}
12919
12920extern __inline __m512d
12921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12922_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
12923{
12924 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
12925 (__v8df) __V,
12926 (__v8df) __W,
12927 (__mmask8) __U,
12928 _MM_FROUND_CUR_DIRECTION);
12929}
12930
12931extern __inline __m512d
12932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12933_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
12934{
12935 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
12936 (__v8df) __V,
12937 (__v8df)
12938 _mm512_setzero_pd (),
12939 (__mmask8) __U,
12940 _MM_FROUND_CUR_DIRECTION);
12941}
12942
12943extern __inline __m512
12944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12945_mm512_div_ps (__m512 __A, __m512 __B)
12946{
2069d6fc 12947 return (__m512) ((__v16sf)__A / (__v16sf)__B);
756c5857
AI
12948}
12949
12950extern __inline __m512
12951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12952_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12953{
12954 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
12955 (__v16sf) __B,
12956 (__v16sf) __W,
12957 (__mmask16) __U,
12958 _MM_FROUND_CUR_DIRECTION);
12959}
12960
12961extern __inline __m512
12962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12963_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
12964{
12965 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
12966 (__v16sf) __B,
12967 (__v16sf)
12968 _mm512_setzero_ps (),
12969 (__mmask16) __U,
12970 _MM_FROUND_CUR_DIRECTION);
12971}
12972
f4ee3a9e
UB
12973extern __inline __m128d
12974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12975_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
12976 __m128d __B)
12977{
12978 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
12979 (__v2df) __B,
12980 (__v2df) __W,
12981 (__mmask8) __U,
12982 _MM_FROUND_CUR_DIRECTION);
12983}
12984
12985extern __inline __m128d
12986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12987_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
12988{
12989 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
12990 (__v2df) __B,
12991 (__v2df)
12992 _mm_setzero_pd (),
12993 (__mmask8) __U,
12994 _MM_FROUND_CUR_DIRECTION);
12995}
12996
12997extern __inline __m128
12998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12999_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
13000 __m128 __B)
13001{
13002 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
13003 (__v4sf) __B,
13004 (__v4sf) __W,
13005 (__mmask8) __U,
13006 _MM_FROUND_CUR_DIRECTION);
13007}
13008
13009extern __inline __m128
13010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13011_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
13012{
13013 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
13014 (__v4sf) __B,
13015 (__v4sf)
13016 _mm_setzero_ps (),
13017 (__mmask8) __U,
13018 _MM_FROUND_CUR_DIRECTION);
13019}
13020
756c5857
AI
13021extern __inline __m512d
13022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13023_mm512_max_pd (__m512d __A, __m512d __B)
13024{
13025 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
13026 (__v8df) __B,
13027 (__v8df)
0b192937 13028 _mm512_undefined_pd (),
756c5857
AI
13029 (__mmask8) -1,
13030 _MM_FROUND_CUR_DIRECTION);
13031}
13032
13033extern __inline __m512d
13034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13035_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13036{
13037 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
13038 (__v8df) __B,
13039 (__v8df) __W,
13040 (__mmask8) __U,
13041 _MM_FROUND_CUR_DIRECTION);
13042}
13043
13044extern __inline __m512d
13045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13046_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
13047{
13048 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
13049 (__v8df) __B,
13050 (__v8df)
13051 _mm512_setzero_pd (),
13052 (__mmask8) __U,
13053 _MM_FROUND_CUR_DIRECTION);
13054}
13055
13056extern __inline __m512
13057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13058_mm512_max_ps (__m512 __A, __m512 __B)
13059{
13060 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13061 (__v16sf) __B,
13062 (__v16sf)
0b192937 13063 _mm512_undefined_ps (),
756c5857
AI
13064 (__mmask16) -1,
13065 _MM_FROUND_CUR_DIRECTION);
13066}
13067
13068extern __inline __m512
13069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13070_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13071{
13072 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13073 (__v16sf) __B,
13074 (__v16sf) __W,
13075 (__mmask16) __U,
13076 _MM_FROUND_CUR_DIRECTION);
13077}
13078
13079extern __inline __m512
13080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13081_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
13082{
13083 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13084 (__v16sf) __B,
13085 (__v16sf)
13086 _mm512_setzero_ps (),
13087 (__mmask16) __U,
13088 _MM_FROUND_CUR_DIRECTION);
13089}
13090
dc7401c0
SP
13091extern __inline __m128d
13092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13093_mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13094{
13095 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
13096 (__v2df) __B,
13097 (__v2df) __W,
13098 (__mmask8) __U,
13099 _MM_FROUND_CUR_DIRECTION);
13100}
13101
13102extern __inline __m128d
13103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13104_mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
13105{
13106 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
13107 (__v2df) __B,
13108 (__v2df)
13109 _mm_setzero_pd (),
13110 (__mmask8) __U,
13111 _MM_FROUND_CUR_DIRECTION);
13112}
13113
13114extern __inline __m128
13115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13116_mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13117{
13118 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
13119 (__v4sf) __B,
13120 (__v4sf) __W,
13121 (__mmask8) __U,
13122 _MM_FROUND_CUR_DIRECTION);
13123}
13124
13125extern __inline __m128
13126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13127_mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
13128{
13129 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
13130 (__v4sf) __B,
13131 (__v4sf)
13132 _mm_setzero_ps (),
13133 (__mmask8) __U,
13134 _MM_FROUND_CUR_DIRECTION);
13135}
13136
756c5857
AI
13137extern __inline __m512d
13138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13139_mm512_min_pd (__m512d __A, __m512d __B)
13140{
13141 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
13142 (__v8df) __B,
13143 (__v8df)
0b192937 13144 _mm512_undefined_pd (),
756c5857
AI
13145 (__mmask8) -1,
13146 _MM_FROUND_CUR_DIRECTION);
13147}
13148
13149extern __inline __m512d
13150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13151_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13152{
13153 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
13154 (__v8df) __B,
13155 (__v8df) __W,
13156 (__mmask8) __U,
13157 _MM_FROUND_CUR_DIRECTION);
13158}
13159
13160extern __inline __m512d
13161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13162_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
13163{
13164 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
13165 (__v8df) __B,
13166 (__v8df)
13167 _mm512_setzero_pd (),
13168 (__mmask8) __U,
13169 _MM_FROUND_CUR_DIRECTION);
13170}
13171
13172extern __inline __m512
13173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13174_mm512_min_ps (__m512 __A, __m512 __B)
13175{
13176 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
13177 (__v16sf) __B,
13178 (__v16sf)
0b192937 13179 _mm512_undefined_ps (),
756c5857
AI
13180 (__mmask16) -1,
13181 _MM_FROUND_CUR_DIRECTION);
13182}
13183
13184extern __inline __m512
13185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13186_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13187{
13188 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
13189 (__v16sf) __B,
13190 (__v16sf) __W,
13191 (__mmask16) __U,
13192 _MM_FROUND_CUR_DIRECTION);
13193}
13194
13195extern __inline __m512
13196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13197_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
13198{
13199 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
13200 (__v16sf) __B,
13201 (__v16sf)
13202 _mm512_setzero_ps (),
13203 (__mmask16) __U,
13204 _MM_FROUND_CUR_DIRECTION);
13205}
13206
dc7401c0
SP
13207extern __inline __m128d
13208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13209_mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13210{
13211 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
13212 (__v2df) __B,
13213 (__v2df) __W,
13214 (__mmask8) __U,
13215 _MM_FROUND_CUR_DIRECTION);
13216}
13217
13218extern __inline __m128d
13219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13220_mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
13221{
13222 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
13223 (__v2df) __B,
13224 (__v2df)
13225 _mm_setzero_pd (),
13226 (__mmask8) __U,
13227 _MM_FROUND_CUR_DIRECTION);
13228}
13229
13230extern __inline __m128
13231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13232_mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13233{
13234 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
13235 (__v4sf) __B,
13236 (__v4sf) __W,
13237 (__mmask8) __U,
13238 _MM_FROUND_CUR_DIRECTION);
13239}
13240
13241extern __inline __m128
13242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13243_mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
13244{
13245 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
13246 (__v4sf) __B,
13247 (__v4sf)
13248 _mm_setzero_ps (),
13249 (__mmask8) __U,
13250 _MM_FROUND_CUR_DIRECTION);
13251}
13252
756c5857
AI
13253extern __inline __m512d
13254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13255_mm512_scalef_pd (__m512d __A, __m512d __B)
13256{
13257 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
13258 (__v8df) __B,
13259 (__v8df)
0b192937 13260 _mm512_undefined_pd (),
756c5857
AI
13261 (__mmask8) -1,
13262 _MM_FROUND_CUR_DIRECTION);
13263}
13264
13265extern __inline __m512d
13266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13267_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13268{
13269 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
13270 (__v8df) __B,
13271 (__v8df) __W,
13272 (__mmask8) __U,
13273 _MM_FROUND_CUR_DIRECTION);
13274}
13275
13276extern __inline __m512d
13277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13278_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
13279{
13280 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
13281 (__v8df) __B,
13282 (__v8df)
13283 _mm512_setzero_pd (),
13284 (__mmask8) __U,
13285 _MM_FROUND_CUR_DIRECTION);
13286}
13287
13288extern __inline __m512
13289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13290_mm512_scalef_ps (__m512 __A, __m512 __B)
13291{
13292 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
13293 (__v16sf) __B,
13294 (__v16sf)
0b192937 13295 _mm512_undefined_ps (),
756c5857
AI
13296 (__mmask16) -1,
13297 _MM_FROUND_CUR_DIRECTION);
13298}
13299
13300extern __inline __m512
13301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13302_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13303{
13304 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
13305 (__v16sf) __B,
13306 (__v16sf) __W,
13307 (__mmask16) __U,
13308 _MM_FROUND_CUR_DIRECTION);
13309}
13310
13311extern __inline __m512
13312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13313_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
13314{
13315 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
13316 (__v16sf) __B,
13317 (__v16sf)
13318 _mm512_setzero_ps (),
13319 (__mmask16) __U,
13320 _MM_FROUND_CUR_DIRECTION);
13321}
13322
075691af
AI
13323extern __inline __m128d
13324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13325_mm_scalef_sd (__m128d __A, __m128d __B)
13326{
158061a6
OM
13327 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
13328 (__v2df) __B,
13329 (__v2df)
13330 _mm_setzero_pd (),
13331 (__mmask8) -1,
13332 _MM_FROUND_CUR_DIRECTION);
075691af
AI
13333}
13334
13335extern __inline __m128
13336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13337_mm_scalef_ss (__m128 __A, __m128 __B)
13338{
158061a6
OM
13339 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
13340 (__v4sf) __B,
13341 (__v4sf)
13342 _mm_setzero_ps (),
13343 (__mmask8) -1,
13344 _MM_FROUND_CUR_DIRECTION);
075691af
AI
13345}
13346
756c5857
AI
13347extern __inline __m512d
13348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13349_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
13350{
13351 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
13352 (__v8df) __B,
13353 (__v8df) __C,
13354 (__mmask8) -1,
13355 _MM_FROUND_CUR_DIRECTION);
13356}
13357
13358extern __inline __m512d
13359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13360_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13361{
13362 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
13363 (__v8df) __B,
13364 (__v8df) __C,
13365 (__mmask8) __U,
13366 _MM_FROUND_CUR_DIRECTION);
13367}
13368
13369extern __inline __m512d
13370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13371_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13372{
13373 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
13374 (__v8df) __B,
13375 (__v8df) __C,
13376 (__mmask8) __U,
13377 _MM_FROUND_CUR_DIRECTION);
13378}
13379
13380extern __inline __m512d
13381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13382_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13383{
13384 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
13385 (__v8df) __B,
13386 (__v8df) __C,
13387 (__mmask8) __U,
13388 _MM_FROUND_CUR_DIRECTION);
13389}
13390
13391extern __inline __m512
13392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13393_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
13394{
13395 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
13396 (__v16sf) __B,
13397 (__v16sf) __C,
13398 (__mmask16) -1,
13399 _MM_FROUND_CUR_DIRECTION);
13400}
13401
13402extern __inline __m512
13403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13404_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13405{
13406 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
13407 (__v16sf) __B,
13408 (__v16sf) __C,
13409 (__mmask16) __U,
13410 _MM_FROUND_CUR_DIRECTION);
13411}
13412
13413extern __inline __m512
13414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13415_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13416{
13417 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
13418 (__v16sf) __B,
13419 (__v16sf) __C,
13420 (__mmask16) __U,
13421 _MM_FROUND_CUR_DIRECTION);
13422}
13423
13424extern __inline __m512
13425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13426_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13427{
13428 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
13429 (__v16sf) __B,
13430 (__v16sf) __C,
13431 (__mmask16) __U,
13432 _MM_FROUND_CUR_DIRECTION);
13433}
13434
13435extern __inline __m512d
13436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13437_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
13438{
fe7f972d 13439 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 13440 (__v8df) __B,
fe7f972d 13441 (__v8df) __C,
756c5857
AI
13442 (__mmask8) -1,
13443 _MM_FROUND_CUR_DIRECTION);
13444}
13445
13446extern __inline __m512d
13447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13448_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13449{
fe7f972d 13450 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 13451 (__v8df) __B,
fe7f972d 13452 (__v8df) __C,
756c5857
AI
13453 (__mmask8) __U,
13454 _MM_FROUND_CUR_DIRECTION);
13455}
13456
13457extern __inline __m512d
13458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13459_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13460{
13461 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
13462 (__v8df) __B,
13463 (__v8df) __C,
13464 (__mmask8) __U,
13465 _MM_FROUND_CUR_DIRECTION);
13466}
13467
13468extern __inline __m512d
13469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13470_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13471{
fe7f972d 13472 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
756c5857 13473 (__v8df) __B,
fe7f972d 13474 (__v8df) __C,
756c5857
AI
13475 (__mmask8) __U,
13476 _MM_FROUND_CUR_DIRECTION);
13477}
13478
13479extern __inline __m512
13480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13481_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
13482{
fe7f972d 13483 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 13484 (__v16sf) __B,
fe7f972d 13485 (__v16sf) __C,
756c5857
AI
13486 (__mmask16) -1,
13487 _MM_FROUND_CUR_DIRECTION);
13488}
13489
13490extern __inline __m512
13491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13492_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13493{
fe7f972d 13494 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 13495 (__v16sf) __B,
fe7f972d 13496 (__v16sf) __C,
756c5857
AI
13497 (__mmask16) __U,
13498 _MM_FROUND_CUR_DIRECTION);
13499}
13500
13501extern __inline __m512
13502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13503_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13504{
13505 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
13506 (__v16sf) __B,
13507 (__v16sf) __C,
13508 (__mmask16) __U,
13509 _MM_FROUND_CUR_DIRECTION);
13510}
13511
13512extern __inline __m512
13513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13514_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13515{
fe7f972d 13516 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
756c5857 13517 (__v16sf) __B,
fe7f972d 13518 (__v16sf) __C,
756c5857
AI
13519 (__mmask16) __U,
13520 _MM_FROUND_CUR_DIRECTION);
13521}
13522
13523extern __inline __m512d
13524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13525_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
13526{
13527 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13528 (__v8df) __B,
13529 (__v8df) __C,
13530 (__mmask8) -1,
13531 _MM_FROUND_CUR_DIRECTION);
13532}
13533
13534extern __inline __m512d
13535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13536_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13537{
13538 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13539 (__v8df) __B,
13540 (__v8df) __C,
13541 (__mmask8) __U,
13542 _MM_FROUND_CUR_DIRECTION);
13543}
13544
13545extern __inline __m512d
13546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13547_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13548{
13549 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
13550 (__v8df) __B,
13551 (__v8df) __C,
13552 (__mmask8) __U,
13553 _MM_FROUND_CUR_DIRECTION);
13554}
13555
13556extern __inline __m512d
13557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13558_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13559{
13560 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
13561 (__v8df) __B,
13562 (__v8df) __C,
13563 (__mmask8) __U,
13564 _MM_FROUND_CUR_DIRECTION);
13565}
13566
13567extern __inline __m512
13568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13569_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
13570{
13571 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13572 (__v16sf) __B,
13573 (__v16sf) __C,
13574 (__mmask16) -1,
13575 _MM_FROUND_CUR_DIRECTION);
13576}
13577
13578extern __inline __m512
13579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13580_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13581{
13582 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13583 (__v16sf) __B,
13584 (__v16sf) __C,
13585 (__mmask16) __U,
13586 _MM_FROUND_CUR_DIRECTION);
13587}
13588
13589extern __inline __m512
13590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13591_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13592{
13593 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
13594 (__v16sf) __B,
13595 (__v16sf) __C,
13596 (__mmask16) __U,
13597 _MM_FROUND_CUR_DIRECTION);
13598}
13599
13600extern __inline __m512
13601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13602_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13603{
13604 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
13605 (__v16sf) __B,
13606 (__v16sf) __C,
13607 (__mmask16) __U,
13608 _MM_FROUND_CUR_DIRECTION);
13609}
13610
13611extern __inline __m512d
13612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13613_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
13614{
13615 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13616 (__v8df) __B,
13617 -(__v8df) __C,
13618 (__mmask8) -1,
13619 _MM_FROUND_CUR_DIRECTION);
13620}
13621
13622extern __inline __m512d
13623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13624_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13625{
13626 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13627 (__v8df) __B,
13628 -(__v8df) __C,
13629 (__mmask8) __U,
13630 _MM_FROUND_CUR_DIRECTION);
13631}
13632
13633extern __inline __m512d
13634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13635_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13636{
13637 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
13638 (__v8df) __B,
13639 (__v8df) __C,
13640 (__mmask8) __U,
13641 _MM_FROUND_CUR_DIRECTION);
13642}
13643
13644extern __inline __m512d
13645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13646_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13647{
13648 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
13649 (__v8df) __B,
13650 -(__v8df) __C,
13651 (__mmask8) __U,
13652 _MM_FROUND_CUR_DIRECTION);
13653}
13654
13655extern __inline __m512
13656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13657_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
13658{
13659 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13660 (__v16sf) __B,
13661 -(__v16sf) __C,
13662 (__mmask16) -1,
13663 _MM_FROUND_CUR_DIRECTION);
13664}
13665
13666extern __inline __m512
13667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13668_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13669{
13670 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13671 (__v16sf) __B,
13672 -(__v16sf) __C,
13673 (__mmask16) __U,
13674 _MM_FROUND_CUR_DIRECTION);
13675}
13676
13677extern __inline __m512
13678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13679_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13680{
13681 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
13682 (__v16sf) __B,
13683 (__v16sf) __C,
13684 (__mmask16) __U,
13685 _MM_FROUND_CUR_DIRECTION);
13686}
13687
13688extern __inline __m512
13689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13690_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13691{
13692 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
13693 (__v16sf) __B,
13694 -(__v16sf) __C,
13695 (__mmask16) __U,
13696 _MM_FROUND_CUR_DIRECTION);
13697}
13698
13699extern __inline __m512d
13700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13701_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
13702{
5ca94977
L
13703 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
13704 (__v8df) __B,
13705 (__v8df) __C,
13706 (__mmask8) -1,
13707 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13708}
13709
13710extern __inline __m512d
13711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13712_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13713{
13714 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
13715 (__v8df) __B,
13716 (__v8df) __C,
13717 (__mmask8) __U,
13718 _MM_FROUND_CUR_DIRECTION);
13719}
13720
13721extern __inline __m512d
13722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13723_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13724{
5ca94977
L
13725 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
13726 (__v8df) __B,
13727 (__v8df) __C,
13728 (__mmask8) __U,
13729 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13730}
13731
13732extern __inline __m512d
13733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13734_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13735{
5ca94977
L
13736 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
13737 (__v8df) __B,
13738 (__v8df) __C,
13739 (__mmask8) __U,
13740 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13741}
13742
13743extern __inline __m512
13744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13745_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
13746{
5ca94977
L
13747 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
13748 (__v16sf) __B,
13749 (__v16sf) __C,
13750 (__mmask16) -1,
13751 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13752}
13753
13754extern __inline __m512
13755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13756_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13757{
13758 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
13759 (__v16sf) __B,
13760 (__v16sf) __C,
13761 (__mmask16) __U,
13762 _MM_FROUND_CUR_DIRECTION);
13763}
13764
13765extern __inline __m512
13766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13767_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13768{
5ca94977
L
13769 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
13770 (__v16sf) __B,
13771 (__v16sf) __C,
13772 (__mmask16) __U,
13773 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13774}
13775
13776extern __inline __m512
13777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13778_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13779{
5ca94977
L
13780 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
13781 (__v16sf) __B,
13782 (__v16sf) __C,
13783 (__mmask16) __U,
13784 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13785}
13786
13787extern __inline __m512d
13788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13789_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
13790{
38ef6fb1
L
13791 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
13792 (__v8df) __B,
13793 (__v8df) __C,
13794 (__mmask8) -1,
13795 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13796}
13797
13798extern __inline __m512d
13799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13800_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13801{
13802 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
13803 (__v8df) __B,
13804 (__v8df) __C,
13805 (__mmask8) __U,
13806 _MM_FROUND_CUR_DIRECTION);
13807}
13808
13809extern __inline __m512d
13810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13811_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13812{
13813 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
13814 (__v8df) __B,
13815 (__v8df) __C,
13816 (__mmask8) __U,
13817 _MM_FROUND_CUR_DIRECTION);
13818}
13819
13820extern __inline __m512d
13821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13822_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13823{
38ef6fb1
L
13824 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
13825 (__v8df) __B,
13826 (__v8df) __C,
13827 (__mmask8) __U,
13828 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13829}
13830
13831extern __inline __m512
13832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13833_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
13834{
38ef6fb1
L
13835 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
13836 (__v16sf) __B,
13837 (__v16sf) __C,
13838 (__mmask16) -1,
13839 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13840}
13841
13842extern __inline __m512
13843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13844_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13845{
13846 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
13847 (__v16sf) __B,
13848 (__v16sf) __C,
13849 (__mmask16) __U,
13850 _MM_FROUND_CUR_DIRECTION);
13851}
13852
13853extern __inline __m512
13854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13855_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13856{
13857 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
13858 (__v16sf) __B,
13859 (__v16sf) __C,
13860 (__mmask16) __U,
13861 _MM_FROUND_CUR_DIRECTION);
13862}
13863
13864extern __inline __m512
13865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13866_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13867{
38ef6fb1
L
13868 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
13869 (__v16sf) __B,
13870 (__v16sf) __C,
13871 (__mmask16) __U,
13872 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13873}
13874
13875extern __inline __m256i
13876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13877_mm512_cvttpd_epi32 (__m512d __A)
13878{
13879 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13880 (__v8si)
0b192937 13881 _mm256_undefined_si256 (),
756c5857
AI
13882 (__mmask8) -1,
13883 _MM_FROUND_CUR_DIRECTION);
13884}
13885
13886extern __inline __m256i
13887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13888_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
13889{
13890 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13891 (__v8si) __W,
13892 (__mmask8) __U,
13893 _MM_FROUND_CUR_DIRECTION);
13894}
13895
13896extern __inline __m256i
13897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13898_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
13899{
13900 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13901 (__v8si)
13902 _mm256_setzero_si256 (),
13903 (__mmask8) __U,
13904 _MM_FROUND_CUR_DIRECTION);
13905}
13906
13907extern __inline __m256i
13908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13909_mm512_cvttpd_epu32 (__m512d __A)
13910{
13911 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13912 (__v8si)
0b192937 13913 _mm256_undefined_si256 (),
756c5857
AI
13914 (__mmask8) -1,
13915 _MM_FROUND_CUR_DIRECTION);
13916}
13917
13918extern __inline __m256i
13919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13920_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
13921{
13922 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13923 (__v8si) __W,
13924 (__mmask8) __U,
13925 _MM_FROUND_CUR_DIRECTION);
13926}
13927
13928extern __inline __m256i
13929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13930_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
13931{
13932 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13933 (__v8si)
13934 _mm256_setzero_si256 (),
13935 (__mmask8) __U,
13936 _MM_FROUND_CUR_DIRECTION);
13937}
13938
13939extern __inline __m256i
13940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13941_mm512_cvtpd_epi32 (__m512d __A)
13942{
13943 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13944 (__v8si)
0b192937 13945 _mm256_undefined_si256 (),
756c5857
AI
13946 (__mmask8) -1,
13947 _MM_FROUND_CUR_DIRECTION);
13948}
13949
13950extern __inline __m256i
13951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13952_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
13953{
13954 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13955 (__v8si) __W,
13956 (__mmask8) __U,
13957 _MM_FROUND_CUR_DIRECTION);
13958}
13959
13960extern __inline __m256i
13961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13962_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
13963{
13964 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13965 (__v8si)
13966 _mm256_setzero_si256 (),
13967 (__mmask8) __U,
13968 _MM_FROUND_CUR_DIRECTION);
13969}
13970
13971extern __inline __m256i
13972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13973_mm512_cvtpd_epu32 (__m512d __A)
13974{
13975 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13976 (__v8si)
0b192937 13977 _mm256_undefined_si256 (),
756c5857
AI
13978 (__mmask8) -1,
13979 _MM_FROUND_CUR_DIRECTION);
13980}
13981
13982extern __inline __m256i
13983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13984_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
13985{
13986 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13987 (__v8si) __W,
13988 (__mmask8) __U,
13989 _MM_FROUND_CUR_DIRECTION);
13990}
13991
13992extern __inline __m256i
13993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13994_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
13995{
13996 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13997 (__v8si)
13998 _mm256_setzero_si256 (),
13999 (__mmask8) __U,
14000 _MM_FROUND_CUR_DIRECTION);
14001}
14002
14003extern __inline __m512i
14004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14005_mm512_cvttps_epi32 (__m512 __A)
14006{
14007 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
14008 (__v16si)
4271e5cb 14009 _mm512_undefined_epi32 (),
756c5857
AI
14010 (__mmask16) -1,
14011 _MM_FROUND_CUR_DIRECTION);
14012}
14013
14014extern __inline __m512i
14015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14016_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
14017{
14018 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
14019 (__v16si) __W,
14020 (__mmask16) __U,
14021 _MM_FROUND_CUR_DIRECTION);
14022}
14023
14024extern __inline __m512i
14025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14026_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
14027{
14028 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
14029 (__v16si)
14030 _mm512_setzero_si512 (),
14031 (__mmask16) __U,
14032 _MM_FROUND_CUR_DIRECTION);
14033}
14034
14035extern __inline __m512i
14036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14037_mm512_cvttps_epu32 (__m512 __A)
14038{
14039 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
14040 (__v16si)
4271e5cb 14041 _mm512_undefined_epi32 (),
756c5857
AI
14042 (__mmask16) -1,
14043 _MM_FROUND_CUR_DIRECTION);
14044}
14045
14046extern __inline __m512i
14047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14048_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
14049{
14050 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
14051 (__v16si) __W,
14052 (__mmask16) __U,
14053 _MM_FROUND_CUR_DIRECTION);
14054}
14055
14056extern __inline __m512i
14057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14058_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
14059{
14060 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
14061 (__v16si)
14062 _mm512_setzero_si512 (),
14063 (__mmask16) __U,
14064 _MM_FROUND_CUR_DIRECTION);
14065}
14066
14067extern __inline __m512i
14068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14069_mm512_cvtps_epi32 (__m512 __A)
14070{
14071 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14072 (__v16si)
4271e5cb 14073 _mm512_undefined_epi32 (),
756c5857
AI
14074 (__mmask16) -1,
14075 _MM_FROUND_CUR_DIRECTION);
14076}
14077
14078extern __inline __m512i
14079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14080_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
14081{
14082 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14083 (__v16si) __W,
14084 (__mmask16) __U,
14085 _MM_FROUND_CUR_DIRECTION);
14086}
14087
14088extern __inline __m512i
14089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14090_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
14091{
14092 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14093 (__v16si)
14094 _mm512_setzero_si512 (),
14095 (__mmask16) __U,
14096 _MM_FROUND_CUR_DIRECTION);
14097}
14098
14099extern __inline __m512i
14100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14101_mm512_cvtps_epu32 (__m512 __A)
14102{
14103 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14104 (__v16si)
4271e5cb 14105 _mm512_undefined_epi32 (),
756c5857
AI
14106 (__mmask16) -1,
14107 _MM_FROUND_CUR_DIRECTION);
14108}
14109
14110extern __inline __m512i
14111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14112_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
14113{
14114 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14115 (__v16si) __W,
14116 (__mmask16) __U,
14117 _MM_FROUND_CUR_DIRECTION);
14118}
14119
14120extern __inline __m512i
14121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14122_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
14123{
14124 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14125 (__v16si)
14126 _mm512_setzero_si512 (),
14127 (__mmask16) __U,
14128 _MM_FROUND_CUR_DIRECTION);
14129}
14130
dcb2c527
JJ
14131extern __inline double
14132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14133_mm512_cvtsd_f64 (__m512d __A)
14134{
14135 return __A[0];
14136}
14137
14138extern __inline float
14139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14140_mm512_cvtss_f32 (__m512 __A)
14141{
14142 return __A[0];
14143}
14144
756c5857
AI
14145#ifdef __x86_64__
14146extern __inline __m128
14147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14148_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
14149{
14150 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
14151 _MM_FROUND_CUR_DIRECTION);
14152}
14153
14154extern __inline __m128d
14155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14156_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
14157{
14158 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
14159 _MM_FROUND_CUR_DIRECTION);
14160}
14161#endif
14162
14163extern __inline __m128
14164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14165_mm_cvtu32_ss (__m128 __A, unsigned __B)
14166{
14167 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
14168 _MM_FROUND_CUR_DIRECTION);
14169}
14170
14171extern __inline __m512
14172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14173_mm512_cvtepi32_ps (__m512i __A)
14174{
14175 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14176 (__v16sf)
0b192937 14177 _mm512_undefined_ps (),
756c5857
AI
14178 (__mmask16) -1,
14179 _MM_FROUND_CUR_DIRECTION);
14180}
14181
14182extern __inline __m512
14183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14184_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
14185{
14186 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14187 (__v16sf) __W,
14188 (__mmask16) __U,
14189 _MM_FROUND_CUR_DIRECTION);
14190}
14191
14192extern __inline __m512
14193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14194_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
14195{
14196 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14197 (__v16sf)
14198 _mm512_setzero_ps (),
14199 (__mmask16) __U,
14200 _MM_FROUND_CUR_DIRECTION);
14201}
14202
14203extern __inline __m512
14204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14205_mm512_cvtepu32_ps (__m512i __A)
14206{
14207 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14208 (__v16sf)
0b192937 14209 _mm512_undefined_ps (),
756c5857
AI
14210 (__mmask16) -1,
14211 _MM_FROUND_CUR_DIRECTION);
14212}
14213
14214extern __inline __m512
14215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14216_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
14217{
14218 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14219 (__v16sf) __W,
14220 (__mmask16) __U,
14221 _MM_FROUND_CUR_DIRECTION);
14222}
14223
14224extern __inline __m512
14225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14226_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
14227{
14228 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14229 (__v16sf)
14230 _mm512_setzero_ps (),
14231 (__mmask16) __U,
14232 _MM_FROUND_CUR_DIRECTION);
14233}
14234
14235#ifdef __OPTIMIZE__
14236extern __inline __m512d
14237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14238_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
756c5857 14239{
040d2bba
WX
14240 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
14241 (__v8df) __B,
14242 (__v8di) __C,
756c5857 14243 __imm,
040d2bba 14244 (__mmask8) -1,
756c5857
AI
14245 _MM_FROUND_CUR_DIRECTION);
14246}
14247
14248extern __inline __m512d
14249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14250_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
14251 __m512i __C, const int __imm)
756c5857
AI
14252{
14253 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
040d2bba
WX
14254 (__v8df) __B,
14255 (__v8di) __C,
756c5857
AI
14256 __imm,
14257 (__mmask8) __U,
14258 _MM_FROUND_CUR_DIRECTION);
14259}
14260
14261extern __inline __m512d
14262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14263_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
14264 __m512i __C, const int __imm)
756c5857
AI
14265{
14266 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
040d2bba
WX
14267 (__v8df) __B,
14268 (__v8di) __C,
756c5857
AI
14269 __imm,
14270 (__mmask8) __U,
14271 _MM_FROUND_CUR_DIRECTION);
14272}
14273
14274extern __inline __m512
14275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14276_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
756c5857 14277{
040d2bba
WX
14278 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
14279 (__v16sf) __B,
14280 (__v16si) __C,
756c5857 14281 __imm,
040d2bba 14282 (__mmask16) -1,
756c5857
AI
14283 _MM_FROUND_CUR_DIRECTION);
14284}
14285
14286extern __inline __m512
14287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14288_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
14289 __m512i __C, const int __imm)
756c5857
AI
14290{
14291 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
040d2bba
WX
14292 (__v16sf) __B,
14293 (__v16si) __C,
756c5857
AI
14294 __imm,
14295 (__mmask16) __U,
14296 _MM_FROUND_CUR_DIRECTION);
14297}
14298
14299extern __inline __m512
14300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14301_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
14302 __m512i __C, const int __imm)
756c5857
AI
14303{
14304 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
040d2bba
WX
14305 (__v16sf) __B,
14306 (__v16si) __C,
756c5857
AI
14307 __imm,
14308 (__mmask16) __U,
14309 _MM_FROUND_CUR_DIRECTION);
14310}
14311
14312extern __inline __m128d
14313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14314_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
756c5857 14315{
040d2bba
WX
14316 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
14317 (__v2df) __B,
14318 (__v2di) __C, __imm,
14319 (__mmask8) -1,
756c5857
AI
14320 _MM_FROUND_CUR_DIRECTION);
14321}
14322
14323extern __inline __m128d
14324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14325_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
14326 __m128i __C, const int __imm)
756c5857
AI
14327{
14328 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
040d2bba
WX
14329 (__v2df) __B,
14330 (__v2di) __C, __imm,
756c5857
AI
14331 (__mmask8) __U,
14332 _MM_FROUND_CUR_DIRECTION);
14333}
14334
14335extern __inline __m128d
14336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14337_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
14338 __m128i __C, const int __imm)
756c5857
AI
14339{
14340 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
040d2bba
WX
14341 (__v2df) __B,
14342 (__v2di) __C,
756c5857
AI
14343 __imm,
14344 (__mmask8) __U,
14345 _MM_FROUND_CUR_DIRECTION);
14346}
14347
14348extern __inline __m128
14349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14350_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
756c5857 14351{
040d2bba
WX
14352 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
14353 (__v4sf) __B,
14354 (__v4si) __C, __imm,
14355 (__mmask8) -1,
756c5857
AI
14356 _MM_FROUND_CUR_DIRECTION);
14357}
14358
14359extern __inline __m128
14360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14361_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
14362 __m128i __C, const int __imm)
756c5857
AI
14363{
14364 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
040d2bba
WX
14365 (__v4sf) __B,
14366 (__v4si) __C, __imm,
756c5857
AI
14367 (__mmask8) __U,
14368 _MM_FROUND_CUR_DIRECTION);
14369}
14370
14371extern __inline __m128
14372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14373_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
14374 __m128i __C, const int __imm)
756c5857
AI
14375{
14376 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
040d2bba
WX
14377 (__v4sf) __B,
14378 (__v4si) __C, __imm,
756c5857
AI
14379 (__mmask8) __U,
14380 _MM_FROUND_CUR_DIRECTION);
14381}
14382#else
040d2bba
WX
14383#define _mm512_fixupimm_pd(X, Y, Z, C) \
14384 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
14385 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
14386 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14387
040d2bba 14388#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
756c5857 14389 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
040d2bba 14390 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
14391 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14392
040d2bba 14393#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
756c5857 14394 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
040d2bba 14395 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
14396 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14397
040d2bba
WX
14398#define _mm512_fixupimm_ps(X, Y, Z, C) \
14399 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
14400 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
14401 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14402
040d2bba 14403#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
756c5857 14404 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
040d2bba 14405 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
756c5857
AI
14406 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14407
040d2bba 14408#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
756c5857 14409 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
040d2bba 14410 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
756c5857
AI
14411 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14412
040d2bba
WX
14413#define _mm_fixupimm_sd(X, Y, Z, C) \
14414 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
14415 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
14416 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14417
040d2bba 14418#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
756c5857 14419 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
040d2bba 14420 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
756c5857
AI
14421 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14422
040d2bba 14423#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
756c5857 14424 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
040d2bba 14425 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
756c5857
AI
14426 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14427
040d2bba
WX
14428#define _mm_fixupimm_ss(X, Y, Z, C) \
14429 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
14430 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
14431 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14432
040d2bba 14433#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
756c5857 14434 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
040d2bba 14435 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
756c5857
AI
14436 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14437
040d2bba 14438#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
756c5857 14439 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
040d2bba 14440 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
756c5857
AI
14441 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14442#endif
14443
14444#ifdef __x86_64__
14445extern __inline unsigned long long
14446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14447_mm_cvtss_u64 (__m128 __A)
14448{
14449 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
14450 __A,
14451 _MM_FROUND_CUR_DIRECTION);
14452}
14453
14454extern __inline unsigned long long
14455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14456_mm_cvttss_u64 (__m128 __A)
14457{
14458 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
14459 __A,
14460 _MM_FROUND_CUR_DIRECTION);
14461}
14462
14463extern __inline long long
14464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14465_mm_cvttss_i64 (__m128 __A)
14466{
14467 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
14468 _MM_FROUND_CUR_DIRECTION);
14469}
14470#endif /* __x86_64__ */
14471
93103603
SP
14472extern __inline int
14473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14474_mm512_cvtsi512_si32 (__m512i __A)
14475{
14476 __v16si __B = (__v16si) __A;
14477 return __B[0];
14478}
14479
756c5857
AI
14480extern __inline unsigned
14481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14482_mm_cvtss_u32 (__m128 __A)
14483{
14484 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
14485 _MM_FROUND_CUR_DIRECTION);
14486}
14487
14488extern __inline unsigned
14489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14490_mm_cvttss_u32 (__m128 __A)
14491{
14492 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
14493 _MM_FROUND_CUR_DIRECTION);
14494}
14495
14496extern __inline int
14497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14498_mm_cvttss_i32 (__m128 __A)
14499{
14500 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
14501 _MM_FROUND_CUR_DIRECTION);
14502}
14503
93103603
SP
14504extern __inline int
14505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14506_mm_cvtsd_i32 (__m128d __A)
14507{
14508 return (int) __builtin_ia32_cvtsd2si ((__v2df) __A);
14509}
14510
14511extern __inline int
14512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14513_mm_cvtss_i32 (__m128 __A)
14514{
14515 return (int) __builtin_ia32_cvtss2si ((__v4sf) __A);
14516}
14517
14518extern __inline __m128d
14519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14520_mm_cvti32_sd (__m128d __A, int __B)
14521{
14522 return (__m128d) __builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
14523}
14524
14525extern __inline __m128
14526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14527_mm_cvti32_ss (__m128 __A, int __B)
14528{
14529 return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
14530}
14531
756c5857
AI
14532#ifdef __x86_64__
14533extern __inline unsigned long long
14534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14535_mm_cvtsd_u64 (__m128d __A)
14536{
14537 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
14538 __A,
14539 _MM_FROUND_CUR_DIRECTION);
14540}
14541
14542extern __inline unsigned long long
14543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14544_mm_cvttsd_u64 (__m128d __A)
14545{
14546 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
14547 __A,
14548 _MM_FROUND_CUR_DIRECTION);
14549}
14550
14551extern __inline long long
14552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14553_mm_cvttsd_i64 (__m128d __A)
14554{
14555 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
14556 _MM_FROUND_CUR_DIRECTION);
14557}
93103603
SP
14558
14559extern __inline long long
14560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14561_mm_cvtsd_i64 (__m128d __A)
14562{
14563 return (long long) __builtin_ia32_cvtsd2si64 ((__v2df) __A);
14564}
14565
14566extern __inline long long
14567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14568_mm_cvtss_i64 (__m128 __A)
14569{
14570 return (long long) __builtin_ia32_cvtss2si64 ((__v4sf) __A);
14571}
14572
14573extern __inline __m128d
14574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14575_mm_cvti64_sd (__m128d __A, long long __B)
14576{
14577 return (__m128d) __builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
14578}
14579
14580extern __inline __m128
14581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14582_mm_cvti64_ss (__m128 __A, long long __B)
14583{
14584 return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
14585}
756c5857
AI
14586#endif /* __x86_64__ */
14587
14588extern __inline unsigned
14589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14590_mm_cvtsd_u32 (__m128d __A)
14591{
14592 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
14593 _MM_FROUND_CUR_DIRECTION);
14594}
14595
14596extern __inline unsigned
14597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14598_mm_cvttsd_u32 (__m128d __A)
14599{
14600 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
14601 _MM_FROUND_CUR_DIRECTION);
14602}
14603
14604extern __inline int
14605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14606_mm_cvttsd_i32 (__m128d __A)
14607{
14608 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
14609 _MM_FROUND_CUR_DIRECTION);
14610}
14611
14612extern __inline __m512d
14613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14614_mm512_cvtps_pd (__m256 __A)
14615{
14616 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14617 (__v8df)
0b192937 14618 _mm512_undefined_pd (),
756c5857
AI
14619 (__mmask8) -1,
14620 _MM_FROUND_CUR_DIRECTION);
14621}
14622
14623extern __inline __m512d
14624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14625_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
14626{
14627 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14628 (__v8df) __W,
14629 (__mmask8) __U,
14630 _MM_FROUND_CUR_DIRECTION);
14631}
14632
14633extern __inline __m512d
14634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14635_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
14636{
14637 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14638 (__v8df)
14639 _mm512_setzero_pd (),
14640 (__mmask8) __U,
14641 _MM_FROUND_CUR_DIRECTION);
14642}
14643
14644extern __inline __m512
14645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14646_mm512_cvtph_ps (__m256i __A)
14647{
14648 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14649 (__v16sf)
0b192937 14650 _mm512_undefined_ps (),
756c5857
AI
14651 (__mmask16) -1,
14652 _MM_FROUND_CUR_DIRECTION);
14653}
14654
14655extern __inline __m512
14656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14657_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
14658{
14659 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14660 (__v16sf) __W,
14661 (__mmask16) __U,
14662 _MM_FROUND_CUR_DIRECTION);
14663}
14664
14665extern __inline __m512
14666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14667_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
14668{
14669 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14670 (__v16sf)
14671 _mm512_setzero_ps (),
14672 (__mmask16) __U,
14673 _MM_FROUND_CUR_DIRECTION);
14674}
14675
14676extern __inline __m256
14677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14678_mm512_cvtpd_ps (__m512d __A)
14679{
14680 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14681 (__v8sf)
0b192937 14682 _mm256_undefined_ps (),
756c5857
AI
14683 (__mmask8) -1,
14684 _MM_FROUND_CUR_DIRECTION);
14685}
14686
14687extern __inline __m256
14688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14689_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
14690{
14691 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14692 (__v8sf) __W,
14693 (__mmask8) __U,
14694 _MM_FROUND_CUR_DIRECTION);
14695}
14696
14697extern __inline __m256
14698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14699_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
14700{
14701 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14702 (__v8sf)
14703 _mm256_setzero_ps (),
14704 (__mmask8) __U,
14705 _MM_FROUND_CUR_DIRECTION);
14706}
14707
14708#ifdef __OPTIMIZE__
14709extern __inline __m512
14710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14711_mm512_getexp_ps (__m512 __A)
14712{
14713 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14714 (__v16sf)
0b192937 14715 _mm512_undefined_ps (),
756c5857
AI
14716 (__mmask16) -1,
14717 _MM_FROUND_CUR_DIRECTION);
14718}
14719
14720extern __inline __m512
14721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14722_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
14723{
14724 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14725 (__v16sf) __W,
14726 (__mmask16) __U,
14727 _MM_FROUND_CUR_DIRECTION);
14728}
14729
14730extern __inline __m512
14731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14732_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
14733{
14734 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14735 (__v16sf)
14736 _mm512_setzero_ps (),
14737 (__mmask16) __U,
14738 _MM_FROUND_CUR_DIRECTION);
14739}
14740
14741extern __inline __m512d
14742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14743_mm512_getexp_pd (__m512d __A)
14744{
14745 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14746 (__v8df)
0b192937 14747 _mm512_undefined_pd (),
756c5857
AI
14748 (__mmask8) -1,
14749 _MM_FROUND_CUR_DIRECTION);
14750}
14751
14752extern __inline __m512d
14753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14754_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
14755{
14756 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14757 (__v8df) __W,
14758 (__mmask8) __U,
14759 _MM_FROUND_CUR_DIRECTION);
14760}
14761
14762extern __inline __m512d
14763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14764_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
14765{
14766 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14767 (__v8df)
14768 _mm512_setzero_pd (),
14769 (__mmask8) __U,
14770 _MM_FROUND_CUR_DIRECTION);
14771}
14772
075691af
AI
14773extern __inline __m128
14774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14775_mm_getexp_ss (__m128 __A, __m128 __B)
14776{
14777 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
14778 (__v4sf) __B,
14779 _MM_FROUND_CUR_DIRECTION);
14780}
14781
68d872d7
SP
14782extern __inline __m128
14783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14784_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
14785{
14786 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
14787 (__v4sf) __B,
14788 (__v4sf) __W,
14789 (__mmask8) __U,
14790 _MM_FROUND_CUR_DIRECTION);
14791}
14792
14793extern __inline __m128
14794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14795_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
14796{
14797 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
14798 (__v4sf) __B,
14799 (__v4sf)
14800 _mm_setzero_ps (),
14801 (__mmask8) __U,
14802 _MM_FROUND_CUR_DIRECTION);
14803}
14804
075691af
AI
14805extern __inline __m128d
14806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14807_mm_getexp_sd (__m128d __A, __m128d __B)
14808{
14809 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
14810 (__v2df) __B,
14811 _MM_FROUND_CUR_DIRECTION);
14812}
14813
68d872d7
SP
14814extern __inline __m128d
14815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14816_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
14817{
14818 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
14819 (__v2df) __B,
14820 (__v2df) __W,
14821 (__mmask8) __U,
14822 _MM_FROUND_CUR_DIRECTION);
14823}
14824
14825extern __inline __m128d
14826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14827_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
14828{
14829 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
14830 (__v2df) __B,
14831 (__v2df)
14832 _mm_setzero_pd (),
14833 (__mmask8) __U,
14834 _MM_FROUND_CUR_DIRECTION);
14835}
14836
756c5857
AI
14837extern __inline __m512d
14838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14839_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
14840 _MM_MANTISSA_SIGN_ENUM __C)
14841{
14842 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14843 (__C << 2) | __B,
0b192937 14844 _mm512_undefined_pd (),
756c5857
AI
14845 (__mmask8) -1,
14846 _MM_FROUND_CUR_DIRECTION);
14847}
14848
14849extern __inline __m512d
14850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14851_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
14852 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14853{
14854 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14855 (__C << 2) | __B,
14856 (__v8df) __W, __U,
14857 _MM_FROUND_CUR_DIRECTION);
14858}
14859
14860extern __inline __m512d
14861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14862_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
14863 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14864{
14865 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14866 (__C << 2) | __B,
14867 (__v8df)
14868 _mm512_setzero_pd (),
14869 __U,
14870 _MM_FROUND_CUR_DIRECTION);
14871}
14872
14873extern __inline __m512
14874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14875_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
14876 _MM_MANTISSA_SIGN_ENUM __C)
14877{
14878 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14879 (__C << 2) | __B,
0b192937 14880 _mm512_undefined_ps (),
756c5857
AI
14881 (__mmask16) -1,
14882 _MM_FROUND_CUR_DIRECTION);
14883}
14884
14885extern __inline __m512
14886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14887_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
14888 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14889{
14890 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14891 (__C << 2) | __B,
14892 (__v16sf) __W, __U,
14893 _MM_FROUND_CUR_DIRECTION);
14894}
14895
14896extern __inline __m512
14897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14898_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
14899 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14900{
14901 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14902 (__C << 2) | __B,
14903 (__v16sf)
14904 _mm512_setzero_ps (),
14905 __U,
14906 _MM_FROUND_CUR_DIRECTION);
14907}
14908
075691af
AI
14909extern __inline __m128d
14910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14911_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
14912 _MM_MANTISSA_SIGN_ENUM __D)
14913{
14914 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
14915 (__v2df) __B,
14916 (__D << 2) | __C,
14917 _MM_FROUND_CUR_DIRECTION);
14918}
14919
68d872d7
SP
14920extern __inline __m128d
14921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14922_mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
14923 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14924{
14925 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
14926 (__v2df) __B,
14927 (__D << 2) | __C,
14928 (__v2df) __W,
14929 __U,
14930 _MM_FROUND_CUR_DIRECTION);
14931}
14932
14933extern __inline __m128d
14934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14935_mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
14936 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14937{
14938 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
14939 (__v2df) __B,
14940 (__D << 2) | __C,
14941 (__v2df)
14942 _mm_setzero_pd(),
14943 __U,
14944 _MM_FROUND_CUR_DIRECTION);
14945}
14946
075691af
AI
14947extern __inline __m128
14948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14949_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
14950 _MM_MANTISSA_SIGN_ENUM __D)
14951{
14952 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
14953 (__v4sf) __B,
14954 (__D << 2) | __C,
14955 _MM_FROUND_CUR_DIRECTION);
14956}
14957
68d872d7
SP
14958extern __inline __m128
14959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14960_mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
14961 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14962{
14963 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
14964 (__v4sf) __B,
14965 (__D << 2) | __C,
14966 (__v4sf) __W,
14967 __U,
14968 _MM_FROUND_CUR_DIRECTION);
14969}
14970
14971extern __inline __m128
14972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14973_mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
14974 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14975{
14976 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
14977 (__v4sf) __B,
14978 (__D << 2) | __C,
14979 (__v4sf)
14980 _mm_setzero_ps(),
14981 __U,
14982 _MM_FROUND_CUR_DIRECTION);
14983}
14984
756c5857
AI
14985#else
14986#define _mm512_getmant_pd(X, B, C) \
14987 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14988 (int)(((C)<<2) | (B)), \
0b192937 14989 (__v8df)_mm512_undefined_pd(), \
756c5857
AI
14990 (__mmask8)-1,\
14991 _MM_FROUND_CUR_DIRECTION))
14992
14993#define _mm512_mask_getmant_pd(W, U, X, B, C) \
14994 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14995 (int)(((C)<<2) | (B)), \
14996 (__v8df)(__m512d)(W), \
14997 (__mmask8)(U),\
14998 _MM_FROUND_CUR_DIRECTION))
14999
15000#define _mm512_maskz_getmant_pd(U, X, B, C) \
15001 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
15002 (int)(((C)<<2) | (B)), \
0b192937 15003 (__v8df)_mm512_setzero_pd(), \
756c5857
AI
15004 (__mmask8)(U),\
15005 _MM_FROUND_CUR_DIRECTION))
15006#define _mm512_getmant_ps(X, B, C) \
15007 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
15008 (int)(((C)<<2) | (B)), \
0b192937 15009 (__v16sf)_mm512_undefined_ps(), \
756c5857
AI
15010 (__mmask16)-1,\
15011 _MM_FROUND_CUR_DIRECTION))
15012
15013#define _mm512_mask_getmant_ps(W, U, X, B, C) \
15014 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
15015 (int)(((C)<<2) | (B)), \
15016 (__v16sf)(__m512)(W), \
15017 (__mmask16)(U),\
15018 _MM_FROUND_CUR_DIRECTION))
15019
15020#define _mm512_maskz_getmant_ps(U, X, B, C) \
15021 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
15022 (int)(((C)<<2) | (B)), \
0b192937 15023 (__v16sf)_mm512_setzero_ps(), \
756c5857
AI
15024 (__mmask16)(U),\
15025 _MM_FROUND_CUR_DIRECTION))
075691af
AI
15026#define _mm_getmant_sd(X, Y, C, D) \
15027 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
15028 (__v2df)(__m128d)(Y), \
15029 (int)(((D)<<2) | (C)), \
15030 _MM_FROUND_CUR_DIRECTION))
15031
68d872d7
SP
15032#define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
15033 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
15034 (__v2df)(__m128d)(Y), \
15035 (int)(((D)<<2) | (C)), \
15036 (__v2df)(__m128d)(W), \
15037 (__mmask8)(U),\
15038 _MM_FROUND_CUR_DIRECTION))
15039
15040#define _mm_maskz_getmant_sd(U, X, Y, C, D) \
15041 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
15042 (__v2df)(__m128d)(Y), \
15043 (int)(((D)<<2) | (C)), \
15044 (__v2df)_mm_setzero_pd(), \
15045 (__mmask8)(U),\
15046 _MM_FROUND_CUR_DIRECTION))
15047
075691af
AI
15048#define _mm_getmant_ss(X, Y, C, D) \
15049 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
15050 (__v4sf)(__m128)(Y), \
15051 (int)(((D)<<2) | (C)), \
15052 _MM_FROUND_CUR_DIRECTION))
15053
68d872d7
SP
15054#define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
15055 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
15056 (__v4sf)(__m128)(Y), \
15057 (int)(((D)<<2) | (C)), \
15058 (__v4sf)(__m128)(W), \
15059 (__mmask8)(U),\
15060 _MM_FROUND_CUR_DIRECTION))
15061
15062#define _mm_maskz_getmant_ss(U, X, Y, C, D) \
15063 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
15064 (__v4sf)(__m128)(Y), \
15065 (int)(((D)<<2) | (C)), \
15066 (__v4sf)_mm_setzero_ps(), \
15067 (__mmask8)(U),\
15068 _MM_FROUND_CUR_DIRECTION))
15069
075691af 15070#define _mm_getexp_ss(A, B) \
68d872d7 15071 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
075691af
AI
15072 _MM_FROUND_CUR_DIRECTION))
15073
68d872d7
SP
15074#define _mm_mask_getexp_ss(W, U, A, B) \
15075 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
15076 _MM_FROUND_CUR_DIRECTION)
15077
15078#define _mm_maskz_getexp_ss(U, A, B) \
15079 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
15080 _MM_FROUND_CUR_DIRECTION)
15081
075691af 15082#define _mm_getexp_sd(A, B) \
68d872d7 15083 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
075691af
AI
15084 _MM_FROUND_CUR_DIRECTION))
15085
68d872d7
SP
15086#define _mm_mask_getexp_sd(W, U, A, B) \
15087 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
15088 _MM_FROUND_CUR_DIRECTION)
15089
15090#define _mm_maskz_getexp_sd(U, A, B) \
15091 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
15092 _MM_FROUND_CUR_DIRECTION)
15093
756c5857
AI
15094#define _mm512_getexp_ps(A) \
15095 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 15096 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15097
15098#define _mm512_mask_getexp_ps(W, U, A) \
15099 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
15100 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15101
15102#define _mm512_maskz_getexp_ps(U, A) \
15103 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
15104 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15105
15106#define _mm512_getexp_pd(A) \
15107 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 15108 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15109
15110#define _mm512_mask_getexp_pd(W, U, A) \
15111 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
15112 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15113
15114#define _mm512_maskz_getexp_pd(U, A) \
15115 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
15116 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15117#endif
15118
15119#ifdef __OPTIMIZE__
15120extern __inline __m512
15121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15122_mm512_roundscale_ps (__m512 __A, const int __imm)
15123{
15124 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
15125 (__v16sf)
15126 _mm512_undefined_ps (),
15127 -1,
756c5857
AI
15128 _MM_FROUND_CUR_DIRECTION);
15129}
15130
15131extern __inline __m512
15132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15133_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
15134 const int __imm)
15135{
15136 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
15137 (__v16sf) __A,
15138 (__mmask16) __B,
15139 _MM_FROUND_CUR_DIRECTION);
15140}
15141
15142extern __inline __m512
15143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15144_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
15145{
15146 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
15147 __imm,
15148 (__v16sf)
15149 _mm512_setzero_ps (),
15150 (__mmask16) __A,
15151 _MM_FROUND_CUR_DIRECTION);
15152}
15153
15154extern __inline __m512d
15155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15156_mm512_roundscale_pd (__m512d __A, const int __imm)
15157{
15158 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
15159 (__v8df)
15160 _mm512_undefined_pd (),
15161 -1,
756c5857
AI
15162 _MM_FROUND_CUR_DIRECTION);
15163}
15164
15165extern __inline __m512d
15166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15167_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
15168 const int __imm)
15169{
15170 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
15171 (__v8df) __A,
15172 (__mmask8) __B,
15173 _MM_FROUND_CUR_DIRECTION);
15174}
15175
15176extern __inline __m512d
15177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15178_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
15179{
15180 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
15181 __imm,
15182 (__v8df)
15183 _mm512_setzero_pd (),
15184 (__mmask8) __A,
15185 _MM_FROUND_CUR_DIRECTION);
15186}
15187
075691af
AI
15188extern __inline __m128
15189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15190_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
15191{
a7c4d6d1
HL
15192 return (__m128)
15193 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
15194 (__v4sf) __B, __imm,
15195 (__v4sf)
15196 _mm_setzero_ps (),
15197 (__mmask8) -1,
15198 _MM_FROUND_CUR_DIRECTION);
15199}
15200
15201
15202extern __inline __m128
15203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15204_mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D,
15205 const int __imm)
15206{
15207 return (__m128)
15208 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
15209 (__v4sf) __D, __imm,
15210 (__v4sf) __A,
15211 (__mmask8) __B,
15212 _MM_FROUND_CUR_DIRECTION);
15213}
15214
15215extern __inline __m128
15216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15217_mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
15218 const int __imm)
15219{
15220 return (__m128)
15221 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
15222 (__v4sf) __C, __imm,
15223 (__v4sf)
15224 _mm_setzero_ps (),
15225 (__mmask8) __A,
15226 _MM_FROUND_CUR_DIRECTION);
075691af
AI
15227}
15228
15229extern __inline __m128d
15230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15231_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
15232{
a7c4d6d1
HL
15233 return (__m128d)
15234 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
15235 (__v2df) __B, __imm,
15236 (__v2df)
15237 _mm_setzero_pd (),
15238 (__mmask8) -1,
15239 _MM_FROUND_CUR_DIRECTION);
15240}
15241
15242extern __inline __m128d
15243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15244_mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D,
15245 const int __imm)
15246{
15247 return (__m128d)
15248 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
15249 (__v2df) __D, __imm,
15250 (__v2df) __A,
15251 (__mmask8) __B,
15252 _MM_FROUND_CUR_DIRECTION);
15253}
15254
15255extern __inline __m128d
15256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15257_mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
15258 const int __imm)
15259{
15260 return (__m128d)
15261 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
15262 (__v2df) __C, __imm,
15263 (__v2df)
15264 _mm_setzero_pd (),
15265 (__mmask8) __A,
15266 _MM_FROUND_CUR_DIRECTION);
075691af
AI
15267}
15268
756c5857
AI
15269#else
15270#define _mm512_roundscale_ps(A, B) \
15271 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 15272 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15273#define _mm512_mask_roundscale_ps(A, B, C, D) \
15274 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
15275 (int)(D), \
15276 (__v16sf)(__m512)(A), \
15277 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
15278#define _mm512_maskz_roundscale_ps(A, B, C) \
15279 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
15280 (int)(C), \
15281 (__v16sf)_mm512_setzero_ps(),\
15282 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
15283#define _mm512_roundscale_pd(A, B) \
15284 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 15285 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15286#define _mm512_mask_roundscale_pd(A, B, C, D) \
15287 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
15288 (int)(D), \
15289 (__v8df)(__m512d)(A), \
15290 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
15291#define _mm512_maskz_roundscale_pd(A, B, C) \
15292 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
15293 (int)(C), \
15294 (__v8df)_mm512_setzero_pd(),\
15295 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
a7c4d6d1
HL
15296#define _mm_roundscale_ss(A, B, I) \
15297 ((__m128) \
15298 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
15299 (__v4sf) (__m128) (B), \
15300 (int) (I), \
15301 (__v4sf) _mm_setzero_ps (), \
15302 (__mmask8) (-1), \
15303 _MM_FROUND_CUR_DIRECTION))
15304#define _mm_mask_roundscale_ss(A, U, B, C, I) \
15305 ((__m128) \
15306 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
15307 (__v4sf) (__m128) (C), \
15308 (int) (I), \
15309 (__v4sf) (__m128) (A), \
15310 (__mmask8) (U), \
15311 _MM_FROUND_CUR_DIRECTION))
15312#define _mm_maskz_roundscale_ss(U, A, B, I) \
15313 ((__m128) \
15314 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
15315 (__v4sf) (__m128) (B), \
15316 (int) (I), \
15317 (__v4sf) _mm_setzero_ps (), \
15318 (__mmask8) (U), \
15319 _MM_FROUND_CUR_DIRECTION))
15320#define _mm_roundscale_sd(A, B, I) \
15321 ((__m128d) \
15322 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
15323 (__v2df) (__m128d) (B), \
15324 (int) (I), \
15325 (__v2df) _mm_setzero_pd (), \
15326 (__mmask8) (-1), \
15327 _MM_FROUND_CUR_DIRECTION))
15328#define _mm_mask_roundscale_sd(A, U, B, C, I) \
15329 ((__m128d) \
15330 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
15331 (__v2df) (__m128d) (C), \
15332 (int) (I), \
15333 (__v2df) (__m128d) (A), \
15334 (__mmask8) (U), \
15335 _MM_FROUND_CUR_DIRECTION))
15336#define _mm_maskz_roundscale_sd(U, A, B, I) \
15337 ((__m128d) \
15338 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
15339 (__v2df) (__m128d) (B), \
15340 (int) (I), \
15341 (__v2df) _mm_setzero_pd (), \
15342 (__mmask8) (U), \
15343 _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15344#endif
15345
15346#ifdef __OPTIMIZE__
15347extern __inline __mmask8
15348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15349_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
15350{
15351 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15352 (__v8df) __Y, __P,
15353 (__mmask8) -1,
15354 _MM_FROUND_CUR_DIRECTION);
15355}
15356
15357extern __inline __mmask16
15358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15359_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
15360{
15361 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15362 (__v16sf) __Y, __P,
15363 (__mmask16) -1,
15364 _MM_FROUND_CUR_DIRECTION);
15365}
15366
15367extern __inline __mmask16
15368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15369_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
15370{
15371 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15372 (__v16sf) __Y, __P,
15373 (__mmask16) __U,
15374 _MM_FROUND_CUR_DIRECTION);
15375}
15376
15377extern __inline __mmask8
15378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15379_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
15380{
15381 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15382 (__v8df) __Y, __P,
15383 (__mmask8) __U,
15384 _MM_FROUND_CUR_DIRECTION);
15385}
15386
12d69dbf
JJ
15387extern __inline __mmask8
15388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15389_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
15390{
15391 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
15392 (__v2df) __Y, __P,
15393 (__mmask8) -1,
15394 _MM_FROUND_CUR_DIRECTION);
15395}
15396
15397extern __inline __mmask8
15398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15399_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
15400{
15401 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
15402 (__v2df) __Y, __P,
15403 (__mmask8) __M,
15404 _MM_FROUND_CUR_DIRECTION);
15405}
15406
15407extern __inline __mmask8
15408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15409_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
15410{
15411 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
15412 (__v4sf) __Y, __P,
15413 (__mmask8) -1,
15414 _MM_FROUND_CUR_DIRECTION);
15415}
15416
15417extern __inline __mmask8
15418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15419_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
15420{
15421 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
15422 (__v4sf) __Y, __P,
15423 (__mmask8) __M,
15424 _MM_FROUND_CUR_DIRECTION);
15425}
15426
15427#else
15428#define _mm512_cmp_pd_mask(X, Y, P) \
15429 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15430 (__v8df)(__m512d)(Y), (int)(P),\
15431 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15432
15433#define _mm512_cmp_ps_mask(X, Y, P) \
15434 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15435 (__v16sf)(__m512)(Y), (int)(P),\
15436 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
15437
15438#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
15439 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15440 (__v8df)(__m512d)(Y), (int)(P),\
15441 (__mmask8)(M), _MM_FROUND_CUR_DIRECTION))
15442
15443#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
15444 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15445 (__v16sf)(__m512)(Y), (int)(P),\
15446 (__mmask16)(M),_MM_FROUND_CUR_DIRECTION))
15447
15448#define _mm_cmp_sd_mask(X, Y, P) \
15449 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
15450 (__v2df)(__m128d)(Y), (int)(P),\
15451 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15452
15453#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
15454 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
15455 (__v2df)(__m128d)(Y), (int)(P),\
15456 M,_MM_FROUND_CUR_DIRECTION))
15457
15458#define _mm_cmp_ss_mask(X, Y, P) \
15459 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
15460 (__v4sf)(__m128)(Y), (int)(P), \
15461 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15462
15463#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
15464 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
15465 (__v4sf)(__m128)(Y), (int)(P), \
15466 M,_MM_FROUND_CUR_DIRECTION))
15467#endif
15468
7e23f4a6
OM
15469extern __inline __mmask8
15470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15471_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
15472{
15473 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15474 (__v8df) __Y, _CMP_EQ_OQ,
15475 (__mmask8) -1,
15476 _MM_FROUND_CUR_DIRECTION);
15477}
15478
15479extern __inline __mmask8
15480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15481_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15482{
15483 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15484 (__v8df) __Y, _CMP_EQ_OQ,
15485 (__mmask8) __U,
15486 _MM_FROUND_CUR_DIRECTION);
15487}
15488
15489extern __inline __mmask8
15490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15491_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
15492{
15493 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15494 (__v8df) __Y, _CMP_LT_OS,
15495 (__mmask8) -1,
15496 _MM_FROUND_CUR_DIRECTION);
15497}
15498
15499extern __inline __mmask8
15500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15501_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15502{
15503 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15504 (__v8df) __Y, _CMP_LT_OS,
15505 (__mmask8) __U,
15506 _MM_FROUND_CUR_DIRECTION);
15507}
15508
15509extern __inline __mmask8
15510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15511_mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
15512{
15513 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15514 (__v8df) __Y, _CMP_LE_OS,
15515 (__mmask8) -1,
15516 _MM_FROUND_CUR_DIRECTION);
15517}
15518
15519extern __inline __mmask8
15520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15521_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15522{
15523 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15524 (__v8df) __Y, _CMP_LE_OS,
15525 (__mmask8) __U,
15526 _MM_FROUND_CUR_DIRECTION);
15527}
15528
15529extern __inline __mmask8
15530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15531_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
15532{
15533 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15534 (__v8df) __Y, _CMP_UNORD_Q,
15535 (__mmask8) -1,
15536 _MM_FROUND_CUR_DIRECTION);
15537}
15538
15539extern __inline __mmask8
15540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15541_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15542{
15543 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15544 (__v8df) __Y, _CMP_UNORD_Q,
15545 (__mmask8) __U,
15546 _MM_FROUND_CUR_DIRECTION);
15547}
15548
15549extern __inline __mmask8
15550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15551_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
15552{
15553 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15554 (__v8df) __Y, _CMP_NEQ_UQ,
15555 (__mmask8) -1,
15556 _MM_FROUND_CUR_DIRECTION);
15557}
15558
15559extern __inline __mmask8
15560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15561_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15562{
15563 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15564 (__v8df) __Y, _CMP_NEQ_UQ,
15565 (__mmask8) __U,
15566 _MM_FROUND_CUR_DIRECTION);
15567}
15568
15569extern __inline __mmask8
15570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15571_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
15572{
15573 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15574 (__v8df) __Y, _CMP_NLT_US,
15575 (__mmask8) -1,
15576 _MM_FROUND_CUR_DIRECTION);
15577}
15578
15579extern __inline __mmask8
15580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15581_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15582{
15583 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15584 (__v8df) __Y, _CMP_NLT_US,
15585 (__mmask8) __U,
15586 _MM_FROUND_CUR_DIRECTION);
15587}
15588
15589extern __inline __mmask8
15590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15591_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
15592{
15593 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15594 (__v8df) __Y, _CMP_NLE_US,
15595 (__mmask8) -1,
15596 _MM_FROUND_CUR_DIRECTION);
15597}
15598
15599extern __inline __mmask8
15600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15601_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15602{
15603 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15604 (__v8df) __Y, _CMP_NLE_US,
15605 (__mmask8) __U,
15606 _MM_FROUND_CUR_DIRECTION);
15607}
15608
15609extern __inline __mmask8
15610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15611_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
15612{
15613 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15614 (__v8df) __Y, _CMP_ORD_Q,
15615 (__mmask8) -1,
15616 _MM_FROUND_CUR_DIRECTION);
15617}
15618
15619extern __inline __mmask8
15620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15621_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15622{
15623 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15624 (__v8df) __Y, _CMP_ORD_Q,
15625 (__mmask8) __U,
15626 _MM_FROUND_CUR_DIRECTION);
15627}
15628
15629extern __inline __mmask16
15630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15631_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
15632{
15633 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15634 (__v16sf) __Y, _CMP_EQ_OQ,
15635 (__mmask16) -1,
15636 _MM_FROUND_CUR_DIRECTION);
15637}
15638
15639extern __inline __mmask16
15640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15641_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15642{
15643 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15644 (__v16sf) __Y, _CMP_EQ_OQ,
15645 (__mmask16) __U,
15646 _MM_FROUND_CUR_DIRECTION);
15647}
15648
15649extern __inline __mmask16
15650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15651_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
15652{
15653 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15654 (__v16sf) __Y, _CMP_LT_OS,
15655 (__mmask16) -1,
15656 _MM_FROUND_CUR_DIRECTION);
15657}
15658
15659extern __inline __mmask16
15660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15661_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15662{
15663 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15664 (__v16sf) __Y, _CMP_LT_OS,
15665 (__mmask16) __U,
15666 _MM_FROUND_CUR_DIRECTION);
15667}
15668
15669extern __inline __mmask16
15670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15671_mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
15672{
15673 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15674 (__v16sf) __Y, _CMP_LE_OS,
15675 (__mmask16) -1,
15676 _MM_FROUND_CUR_DIRECTION);
15677}
15678
15679extern __inline __mmask16
15680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15681_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15682{
15683 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15684 (__v16sf) __Y, _CMP_LE_OS,
15685 (__mmask16) __U,
15686 _MM_FROUND_CUR_DIRECTION);
15687}
15688
15689extern __inline __mmask16
15690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15691_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
15692{
15693 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15694 (__v16sf) __Y, _CMP_UNORD_Q,
15695 (__mmask16) -1,
15696 _MM_FROUND_CUR_DIRECTION);
15697}
15698
15699extern __inline __mmask16
15700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15701_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15702{
15703 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15704 (__v16sf) __Y, _CMP_UNORD_Q,
15705 (__mmask16) __U,
15706 _MM_FROUND_CUR_DIRECTION);
15707}
15708
15709extern __inline __mmask16
15710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15711_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
15712{
15713 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15714 (__v16sf) __Y, _CMP_NEQ_UQ,
15715 (__mmask16) -1,
15716 _MM_FROUND_CUR_DIRECTION);
15717}
15718
15719extern __inline __mmask16
15720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15721_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15722{
15723 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15724 (__v16sf) __Y, _CMP_NEQ_UQ,
15725 (__mmask16) __U,
15726 _MM_FROUND_CUR_DIRECTION);
15727}
15728
15729extern __inline __mmask16
15730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15731_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
15732{
15733 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15734 (__v16sf) __Y, _CMP_NLT_US,
15735 (__mmask16) -1,
15736 _MM_FROUND_CUR_DIRECTION);
15737}
15738
15739extern __inline __mmask16
15740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15741_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15742{
15743 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15744 (__v16sf) __Y, _CMP_NLT_US,
15745 (__mmask16) __U,
15746 _MM_FROUND_CUR_DIRECTION);
15747}
15748
15749extern __inline __mmask16
15750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15751_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
15752{
15753 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15754 (__v16sf) __Y, _CMP_NLE_US,
15755 (__mmask16) -1,
15756 _MM_FROUND_CUR_DIRECTION);
15757}
15758
15759extern __inline __mmask16
15760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15761_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15762{
15763 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15764 (__v16sf) __Y, _CMP_NLE_US,
15765 (__mmask16) __U,
15766 _MM_FROUND_CUR_DIRECTION);
15767}
15768
15769extern __inline __mmask16
15770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15771_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
15772{
15773 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15774 (__v16sf) __Y, _CMP_ORD_Q,
15775 (__mmask16) -1,
15776 _MM_FROUND_CUR_DIRECTION);
15777}
15778
15779extern __inline __mmask16
15780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15781_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15782{
15783 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15784 (__v16sf) __Y, _CMP_ORD_Q,
15785 (__mmask16) __U,
15786 _MM_FROUND_CUR_DIRECTION);
15787}
15788
2196a885
KY
15789extern __inline __mmask16
15790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15791_mm512_kmov (__mmask16 __A)
15792{
7cdb6e4c 15793 return __builtin_ia32_kmovw (__A);
2196a885
KY
15794}
15795
275be1da
IT
15796extern __inline __m512
15797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15798_mm512_castpd_ps (__m512d __A)
15799{
15800 return (__m512) (__A);
15801}
15802
15803extern __inline __m512i
15804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15805_mm512_castpd_si512 (__m512d __A)
15806{
15807 return (__m512i) (__A);
15808}
15809
15810extern __inline __m512d
15811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15812_mm512_castps_pd (__m512 __A)
15813{
15814 return (__m512d) (__A);
15815}
15816
15817extern __inline __m512i
15818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15819_mm512_castps_si512 (__m512 __A)
15820{
15821 return (__m512i) (__A);
15822}
15823
15824extern __inline __m512
15825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15826_mm512_castsi512_ps (__m512i __A)
15827{
15828 return (__m512) (__A);
15829}
15830
15831extern __inline __m512d
15832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15833_mm512_castsi512_pd (__m512i __A)
15834{
15835 return (__m512d) (__A);
15836}
15837
15838extern __inline __m128d
15839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15840_mm512_castpd512_pd128 (__m512d __A)
15841{
15842 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
15843}
15844
15845extern __inline __m128
15846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15847_mm512_castps512_ps128 (__m512 __A)
15848{
15849 return _mm512_extractf32x4_ps(__A, 0);
15850}
15851
15852extern __inline __m128i
15853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15854_mm512_castsi512_si128 (__m512i __A)
15855{
15856 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
15857}
15858
15859extern __inline __m256d
15860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15861_mm512_castpd512_pd256 (__m512d __A)
15862{
15863 return _mm512_extractf64x4_pd(__A, 0);
15864}
15865
15866extern __inline __m256
15867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15868_mm512_castps512_ps256 (__m512 __A)
15869{
15870 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
15871}
15872
15873extern __inline __m256i
15874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15875_mm512_castsi512_si256 (__m512i __A)
15876{
15877 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
15878}
15879
15880extern __inline __m512d
15881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15882_mm512_castpd128_pd512 (__m128d __A)
15883{
15884 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
15885}
15886
15887extern __inline __m512
15888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15889_mm512_castps128_ps512 (__m128 __A)
15890{
15891 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
15892}
15893
15894extern __inline __m512i
15895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15896_mm512_castsi128_si512 (__m128i __A)
15897{
15898 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
15899}
15900
15901extern __inline __m512d
15902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15903_mm512_castpd256_pd512 (__m256d __A)
15904{
15905 return __builtin_ia32_pd512_256pd (__A);
15906}
15907
15908extern __inline __m512
15909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15910_mm512_castps256_ps512 (__m256 __A)
15911{
15912 return __builtin_ia32_ps512_256ps (__A);
15913}
15914
15915extern __inline __m512i
15916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15917_mm512_castsi256_si512 (__m256i __A)
15918{
15919 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
15920}
15921
e6b2dc24
JJ
15922extern __inline __m512d
15923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15924_mm512_zextpd128_pd512 (__m128d __A)
15925{
15926 return (__m512d) _mm512_insertf32x4 (_mm512_setzero_ps (), (__m128) __A, 0);
15927}
15928
15929extern __inline __m512
15930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15931_mm512_zextps128_ps512 (__m128 __A)
15932{
15933 return _mm512_insertf32x4 (_mm512_setzero_ps (), __A, 0);
15934}
15935
15936extern __inline __m512i
15937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15938_mm512_zextsi128_si512 (__m128i __A)
15939{
15940 return _mm512_inserti32x4 (_mm512_setzero_si512 (), __A, 0);
15941}
15942
15943extern __inline __m512d
15944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15945_mm512_zextpd256_pd512 (__m256d __A)
15946{
15947 return _mm512_insertf64x4 (_mm512_setzero_pd (), __A, 0);
15948}
15949
15950extern __inline __m512
15951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15952_mm512_zextps256_ps512 (__m256 __A)
15953{
15954 return (__m512) _mm512_insertf64x4 (_mm512_setzero_pd (), (__m256d) __A, 0);
15955}
15956
15957extern __inline __m512i
15958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15959_mm512_zextsi256_si512 (__m256i __A)
15960{
15961 return _mm512_inserti64x4 (_mm512_setzero_si512 (), __A, 0);
15962}
15963
275be1da
IT
15964extern __inline __mmask16
15965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15966_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
15967{
15968 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15969 (__v16si) __B, 0,
15970 (__mmask16) -1);
15971}
15972
15973extern __inline __mmask16
15974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15975_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
15976{
15977 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15978 (__v16si) __B, 0, __U);
15979}
15980
15981extern __inline __mmask8
15982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15983_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
15984{
15985 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15986 (__v8di) __B, 0, __U);
15987}
15988
15989extern __inline __mmask8
15990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15991_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
15992{
15993 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15994 (__v8di) __B, 0,
15995 (__mmask8) -1);
15996}
15997
15998extern __inline __mmask16
15999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16000_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
16001{
16002 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16003 (__v16si) __B, 6,
16004 (__mmask16) -1);
16005}
16006
16007extern __inline __mmask16
16008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16009_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
16010{
16011 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16012 (__v16si) __B, 6, __U);
16013}
16014
16015extern __inline __mmask8
16016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16017_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
16018{
16019 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16020 (__v8di) __B, 6, __U);
16021}
16022
16023extern __inline __mmask8
16024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16025_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
16026{
16027 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16028 (__v8di) __B, 6,
16029 (__mmask8) -1);
16030}
16031
167a5b77
JJ
16032#undef __MM512_REDUCE_OP
16033#define __MM512_REDUCE_OP(op) \
16034 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
16035 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
16036 __m256i __T3 = (__m256i) (__T1 op __T2); \
16037 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
16038 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
16039 __v4si __T6 = __T4 op __T5; \
16040 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16041 __v4si __T8 = __T6 op __T7; \
16042 return __T8[0] op __T8[1]
16043
16044extern __inline int
16045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16046_mm512_reduce_add_epi32 (__m512i __A)
16047{
16048 __MM512_REDUCE_OP (+);
16049}
16050
16051extern __inline int
16052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16053_mm512_reduce_mul_epi32 (__m512i __A)
16054{
16055 __MM512_REDUCE_OP (*);
16056}
16057
16058extern __inline int
16059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16060_mm512_reduce_and_epi32 (__m512i __A)
16061{
16062 __MM512_REDUCE_OP (&);
16063}
16064
16065extern __inline int
16066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16067_mm512_reduce_or_epi32 (__m512i __A)
16068{
16069 __MM512_REDUCE_OP (|);
16070}
16071
16072extern __inline int
16073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16074_mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
16075{
16076 __A = _mm512_maskz_mov_epi32 (__U, __A);
16077 __MM512_REDUCE_OP (+);
16078}
16079
16080extern __inline int
16081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16082_mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
16083{
16084 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
16085 __MM512_REDUCE_OP (*);
16086}
16087
16088extern __inline int
16089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16090_mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
16091{
16092 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
16093 __MM512_REDUCE_OP (&);
16094}
16095
16096extern __inline int
16097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16098_mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
16099{
16100 __A = _mm512_maskz_mov_epi32 (__U, __A);
16101 __MM512_REDUCE_OP (|);
16102}
16103
16104#undef __MM512_REDUCE_OP
16105#define __MM512_REDUCE_OP(op) \
16106 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
16107 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
16108 __m256i __T3 = _mm256_##op (__T1, __T2); \
16109 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
16110 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
16111 __m128i __T6 = _mm_##op (__T4, __T5); \
16112 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
16113 (__v4si) { 2, 3, 0, 1 }); \
16114 __m128i __T8 = _mm_##op (__T6, __T7); \
16115 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
16116 (__v4si) { 1, 0, 1, 0 }); \
16117 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
16118 return __T10[0]
16119
16120extern __inline int
16121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16122_mm512_reduce_min_epi32 (__m512i __A)
16123{
16124 __MM512_REDUCE_OP (min_epi32);
16125}
16126
16127extern __inline int
16128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16129_mm512_reduce_max_epi32 (__m512i __A)
16130{
16131 __MM512_REDUCE_OP (max_epi32);
16132}
16133
16134extern __inline unsigned int
16135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16136_mm512_reduce_min_epu32 (__m512i __A)
16137{
16138 __MM512_REDUCE_OP (min_epu32);
16139}
16140
16141extern __inline unsigned int
16142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16143_mm512_reduce_max_epu32 (__m512i __A)
16144{
16145 __MM512_REDUCE_OP (max_epu32);
16146}
16147
16148extern __inline int
16149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16150_mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
16151{
16152 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
16153 __MM512_REDUCE_OP (min_epi32);
16154}
16155
16156extern __inline int
16157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16158_mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
16159{
16160 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
16161 __MM512_REDUCE_OP (max_epi32);
16162}
16163
16164extern __inline unsigned int
16165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16166_mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
16167{
16168 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
16169 __MM512_REDUCE_OP (min_epu32);
16170}
16171
16172extern __inline unsigned int
16173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16174_mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
16175{
16176 __A = _mm512_maskz_mov_epi32 (__U, __A);
16177 __MM512_REDUCE_OP (max_epu32);
16178}
16179
16180#undef __MM512_REDUCE_OP
16181#define __MM512_REDUCE_OP(op) \
16182 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
16183 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
16184 __m256 __T3 = __T1 op __T2; \
16185 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
16186 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
16187 __m128 __T6 = __T4 op __T5; \
16188 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16189 __m128 __T8 = __T6 op __T7; \
16190 return __T8[0] op __T8[1]
16191
16192extern __inline float
16193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16194_mm512_reduce_add_ps (__m512 __A)
16195{
16196 __MM512_REDUCE_OP (+);
16197}
16198
16199extern __inline float
16200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16201_mm512_reduce_mul_ps (__m512 __A)
16202{
16203 __MM512_REDUCE_OP (*);
16204}
16205
16206extern __inline float
16207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16208_mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
16209{
16210 __A = _mm512_maskz_mov_ps (__U, __A);
16211 __MM512_REDUCE_OP (+);
16212}
16213
16214extern __inline float
16215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16216_mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
16217{
16218 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
16219 __MM512_REDUCE_OP (*);
16220}
16221
16222#undef __MM512_REDUCE_OP
16223#define __MM512_REDUCE_OP(op) \
16224 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
16225 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
16226 __m256 __T3 = _mm256_##op (__T1, __T2); \
16227 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
16228 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
16229 __m128 __T6 = _mm_##op (__T4, __T5); \
16230 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16231 __m128 __T8 = _mm_##op (__T6, __T7); \
16232 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
16233 __m128 __T10 = _mm_##op (__T8, __T9); \
16234 return __T10[0]
16235
16236extern __inline float
16237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16238_mm512_reduce_min_ps (__m512 __A)
16239{
16240 __MM512_REDUCE_OP (min_ps);
16241}
16242
16243extern __inline float
16244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16245_mm512_reduce_max_ps (__m512 __A)
16246{
16247 __MM512_REDUCE_OP (max_ps);
16248}
16249
16250extern __inline float
16251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16252_mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
16253{
16254 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
16255 __MM512_REDUCE_OP (min_ps);
16256}
16257
16258extern __inline float
16259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16260_mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
16261{
16262 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
16263 __MM512_REDUCE_OP (max_ps);
16264}
16265
16266#undef __MM512_REDUCE_OP
16267#define __MM512_REDUCE_OP(op) \
16268 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
16269 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
16270 __m256i __T3 = (__m256i) (__T1 op __T2); \
16271 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
16272 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
16273 __v2di __T6 = __T4 op __T5; \
16274 return __T6[0] op __T6[1]
16275
16276extern __inline long long
16277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16278_mm512_reduce_add_epi64 (__m512i __A)
16279{
16280 __MM512_REDUCE_OP (+);
16281}
16282
16283extern __inline long long
16284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16285_mm512_reduce_mul_epi64 (__m512i __A)
16286{
16287 __MM512_REDUCE_OP (*);
16288}
16289
16290extern __inline long long
16291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16292_mm512_reduce_and_epi64 (__m512i __A)
16293{
16294 __MM512_REDUCE_OP (&);
16295}
16296
16297extern __inline long long
16298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16299_mm512_reduce_or_epi64 (__m512i __A)
16300{
16301 __MM512_REDUCE_OP (|);
16302}
16303
16304extern __inline long long
16305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16306_mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
16307{
16308 __A = _mm512_maskz_mov_epi64 (__U, __A);
16309 __MM512_REDUCE_OP (+);
16310}
16311
16312extern __inline long long
16313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16314_mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
16315{
16316 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
16317 __MM512_REDUCE_OP (*);
16318}
16319
16320extern __inline long long
16321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16322_mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
16323{
16324 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16325 __MM512_REDUCE_OP (&);
16326}
16327
16328extern __inline long long
16329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16330_mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
16331{
16332 __A = _mm512_maskz_mov_epi64 (__U, __A);
16333 __MM512_REDUCE_OP (|);
16334}
16335
16336#undef __MM512_REDUCE_OP
16337#define __MM512_REDUCE_OP(op) \
16338 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
16339 __m512i __T2 = _mm512_##op (__A, __T1); \
16340 __m512i __T3 \
16341 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
16342 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
16343 __m512i __T4 = _mm512_##op (__T2, __T3); \
16344 __m512i __T5 \
16345 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
16346 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
16347 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
16348 return __T6[0]
16349
16350extern __inline long long
16351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16352_mm512_reduce_min_epi64 (__m512i __A)
16353{
16354 __MM512_REDUCE_OP (min_epi64);
16355}
16356
16357extern __inline long long
16358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16359_mm512_reduce_max_epi64 (__m512i __A)
16360{
16361 __MM512_REDUCE_OP (max_epi64);
16362}
16363
16364extern __inline long long
16365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16366_mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
16367{
16368 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
16369 __U, __A);
16370 __MM512_REDUCE_OP (min_epi64);
16371}
16372
16373extern __inline long long
16374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16375_mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
16376{
16377 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
16378 __U, __A);
16379 __MM512_REDUCE_OP (max_epi64);
16380}
16381
16382extern __inline unsigned long long
16383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16384_mm512_reduce_min_epu64 (__m512i __A)
16385{
16386 __MM512_REDUCE_OP (min_epu64);
16387}
16388
16389extern __inline unsigned long long
16390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16391_mm512_reduce_max_epu64 (__m512i __A)
16392{
16393 __MM512_REDUCE_OP (max_epu64);
16394}
16395
16396extern __inline unsigned long long
16397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16398_mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
16399{
16400 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16401 __MM512_REDUCE_OP (min_epu64);
16402}
16403
16404extern __inline unsigned long long
16405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16406_mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
16407{
16408 __A = _mm512_maskz_mov_epi64 (__U, __A);
16409 __MM512_REDUCE_OP (max_epu64);
16410}
16411
16412#undef __MM512_REDUCE_OP
16413#define __MM512_REDUCE_OP(op) \
16414 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16415 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16416 __m256d __T3 = __T1 op __T2; \
16417 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16418 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16419 __m128d __T6 = __T4 op __T5; \
16420 return __T6[0] op __T6[1]
16421
16422extern __inline double
16423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16424_mm512_reduce_add_pd (__m512d __A)
16425{
16426 __MM512_REDUCE_OP (+);
16427}
16428
16429extern __inline double
16430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16431_mm512_reduce_mul_pd (__m512d __A)
16432{
16433 __MM512_REDUCE_OP (*);
16434}
16435
16436extern __inline double
16437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16438_mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
16439{
16440 __A = _mm512_maskz_mov_pd (__U, __A);
16441 __MM512_REDUCE_OP (+);
16442}
16443
16444extern __inline double
16445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16446_mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
16447{
16448 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
16449 __MM512_REDUCE_OP (*);
16450}
16451
16452#undef __MM512_REDUCE_OP
16453#define __MM512_REDUCE_OP(op) \
16454 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16455 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16456 __m256d __T3 = _mm256_##op (__T1, __T2); \
16457 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16458 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16459 __m128d __T6 = _mm_##op (__T4, __T5); \
16460 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
16461 __m128d __T8 = _mm_##op (__T6, __T7); \
16462 return __T8[0]
16463
16464extern __inline double
16465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16466_mm512_reduce_min_pd (__m512d __A)
16467{
16468 __MM512_REDUCE_OP (min_pd);
16469}
16470
16471extern __inline double
16472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16473_mm512_reduce_max_pd (__m512d __A)
16474{
16475 __MM512_REDUCE_OP (max_pd);
16476}
16477
16478extern __inline double
16479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16480_mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
16481{
16482 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
16483 __MM512_REDUCE_OP (min_pd);
16484}
16485
16486extern __inline double
16487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16488_mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
16489{
16490 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
16491 __MM512_REDUCE_OP (max_pd);
16492}
16493
16494#undef __MM512_REDUCE_OP
16495
756c5857
AI
16496#ifdef __DISABLE_AVX512F__
16497#undef __DISABLE_AVX512F__
16498#pragma GCC pop_options
16499#endif /* __DISABLE_AVX512F__ */
16500
16501#endif /* _AVX512FINTRIN_H_INCLUDED */