]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512fintrin.h
i386.md (ffs<mode>2): Generate CCCmode flags register for TARGET_BMI.
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
818ab71a 1/* Copyright (C) 2013-2016 Free Software Foundation, Inc.
756c5857
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
2069d6fc 41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
756c5857 42typedef int __v16si __attribute__ ((__vector_size__ (64)));
2069d6fc 43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
7d9088c2 44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
2069d6fc 45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
7d9088c2 46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
2069d6fc 47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
756c5857
AI
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
c6b0037d
MG
55/* Unaligned version of the same type. */
56typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
756c5857
AI
60typedef unsigned char __mmask8;
61typedef unsigned short __mmask16;
62
756c5857
AI
63extern __inline __m512i
64__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65_mm512_set_epi64 (long long __A, long long __B, long long __C,
66 long long __D, long long __E, long long __F,
67 long long __G, long long __H)
68{
69 return __extension__ (__m512i) (__v8di)
70 { __H, __G, __F, __E, __D, __C, __B, __A };
71}
72
73/* Create the vector [A B C D E F G H I J K L M N O P]. */
74extern __inline __m512i
75__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
76_mm512_set_epi32 (int __A, int __B, int __C, int __D,
77 int __E, int __F, int __G, int __H,
78 int __I, int __J, int __K, int __L,
79 int __M, int __N, int __O, int __P)
80{
81 return __extension__ (__m512i)(__v16si)
82 { __P, __O, __N, __M, __L, __K, __J, __I,
83 __H, __G, __F, __E, __D, __C, __B, __A };
84}
85
86extern __inline __m512d
87__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88_mm512_set_pd (double __A, double __B, double __C, double __D,
89 double __E, double __F, double __G, double __H)
90{
91 return __extension__ (__m512d)
92 { __H, __G, __F, __E, __D, __C, __B, __A };
93}
94
95extern __inline __m512
96__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97_mm512_set_ps (float __A, float __B, float __C, float __D,
98 float __E, float __F, float __G, float __H,
99 float __I, float __J, float __K, float __L,
100 float __M, float __N, float __O, float __P)
101{
102 return __extension__ (__m512)
103 { __P, __O, __N, __M, __L, __K, __J, __I,
104 __H, __G, __F, __E, __D, __C, __B, __A };
105}
106
107#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
108 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
109
110#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
111 e8,e9,e10,e11,e12,e13,e14,e15) \
112 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
113
114#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
115 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
116
117#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
118 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
119
0b192937
UD
120extern __inline __m512
121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
122_mm512_undefined_ps (void)
123{
124 __m512 __Y = __Y;
125 return __Y;
126}
127
128extern __inline __m512d
129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
130_mm512_undefined_pd (void)
131{
132 __m512d __Y = __Y;
133 return __Y;
134}
135
136extern __inline __m512i
137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4271e5cb 138_mm512_undefined_epi32 (void)
0b192937
UD
139{
140 __m512i __Y = __Y;
141 return __Y;
142}
143
4271e5cb
UB
144#define _mm512_undefined_si512 _mm512_undefined_epi32
145
7d9088c2
UD
146extern __inline __m512i
147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
148_mm512_set1_epi8 (char __A)
149{
150 return __extension__ (__m512i)(__v64qi)
151 { __A, __A, __A, __A, __A, __A, __A, __A,
152 __A, __A, __A, __A, __A, __A, __A, __A,
153 __A, __A, __A, __A, __A, __A, __A, __A,
154 __A, __A, __A, __A, __A, __A, __A, __A,
155 __A, __A, __A, __A, __A, __A, __A, __A,
156 __A, __A, __A, __A, __A, __A, __A, __A,
157 __A, __A, __A, __A, __A, __A, __A, __A,
158 __A, __A, __A, __A, __A, __A, __A, __A };
159}
160
161extern __inline __m512i
162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163_mm512_set1_epi16 (short __A)
164{
165 return __extension__ (__m512i)(__v32hi)
166 { __A, __A, __A, __A, __A, __A, __A, __A,
167 __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A };
170}
171
2b2384e8
UD
172extern __inline __m512d
173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174_mm512_set1_pd (double __A)
175{
176 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
177 (__v2df) { __A, },
178 (__v8df)
179 _mm512_undefined_pd (),
180 (__mmask8) -1);
181}
182
183extern __inline __m512
184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
185_mm512_set1_ps (float __A)
186{
187 return (__m512) __builtin_ia32_broadcastss512 (__extension__
188 (__v4sf) { __A, },
189 (__v16sf)
190 _mm512_undefined_ps (),
191 (__mmask16) -1);
192}
193
7d9088c2
UD
194/* Create the vector [A B C D A B C D A B C D A B C D]. */
195extern __inline __m512i
196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
197_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
198{
199 return __extension__ (__m512i)(__v16si)
200 { __D, __C, __B, __A, __D, __C, __B, __A,
201 __D, __C, __B, __A, __D, __C, __B, __A };
202}
203
204extern __inline __m512i
205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
206_mm512_set4_epi64 (long long __A, long long __B, long long __C,
207 long long __D)
208{
209 return __extension__ (__m512i) (__v8di)
210 { __D, __C, __B, __A, __D, __C, __B, __A };
211}
212
213extern __inline __m512d
214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
215_mm512_set4_pd (double __A, double __B, double __C, double __D)
216{
217 return __extension__ (__m512d)
218 { __D, __C, __B, __A, __D, __C, __B, __A };
219}
220
221extern __inline __m512
222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223_mm512_set4_ps (float __A, float __B, float __C, float __D)
224{
225 return __extension__ (__m512)
226 { __D, __C, __B, __A, __D, __C, __B, __A,
227 __D, __C, __B, __A, __D, __C, __B, __A };
228}
229
230#define _mm512_setr4_epi64(e0,e1,e2,e3) \
231 _mm512_set4_epi64(e3,e2,e1,e0)
232
233#define _mm512_setr4_epi32(e0,e1,e2,e3) \
234 _mm512_set4_epi32(e3,e2,e1,e0)
235
236#define _mm512_setr4_pd(e0,e1,e2,e3) \
237 _mm512_set4_pd(e3,e2,e1,e0)
238
239#define _mm512_setr4_ps(e0,e1,e2,e3) \
240 _mm512_set4_ps(e3,e2,e1,e0)
241
756c5857
AI
242extern __inline __m512
243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
244_mm512_setzero_ps (void)
245{
246 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
247 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
248}
249
250extern __inline __m512d
251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252_mm512_setzero_pd (void)
253{
254 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
255}
256
7d9088c2
UD
257extern __inline __m512i
258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259_mm512_setzero_epi32 (void)
260{
261 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
262}
263
756c5857
AI
264extern __inline __m512i
265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266_mm512_setzero_si512 (void)
267{
268 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
269}
270
271extern __inline __m512d
272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
273_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
274{
275 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
276 (__v8df) __W,
277 (__mmask8) __U);
278}
279
280extern __inline __m512d
281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
283{
284 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
285 (__v8df)
286 _mm512_setzero_pd (),
287 (__mmask8) __U);
288}
289
290extern __inline __m512
291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
292_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
293{
294 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
295 (__v16sf) __W,
296 (__mmask16) __U);
297}
298
299extern __inline __m512
300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
302{
303 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
304 (__v16sf)
305 _mm512_setzero_ps (),
306 (__mmask16) __U);
307}
308
309extern __inline __m512d
310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311_mm512_load_pd (void const *__P)
312{
313 return *(__m512d *) __P;
314}
315
316extern __inline __m512d
317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
319{
320 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
321 (__v8df) __W,
322 (__mmask8) __U);
323}
324
325extern __inline __m512d
326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
328{
329 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
330 (__v8df)
331 _mm512_setzero_pd (),
332 (__mmask8) __U);
333}
334
335extern __inline void
336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337_mm512_store_pd (void *__P, __m512d __A)
338{
339 *(__m512d *) __P = __A;
340}
341
342extern __inline void
343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
344_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
345{
346 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
347 (__mmask8) __U);
348}
349
350extern __inline __m512
351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352_mm512_load_ps (void const *__P)
353{
354 return *(__m512 *) __P;
355}
356
357extern __inline __m512
358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
359_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
360{
361 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
362 (__v16sf) __W,
363 (__mmask16) __U);
364}
365
366extern __inline __m512
367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
369{
370 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
371 (__v16sf)
372 _mm512_setzero_ps (),
373 (__mmask16) __U);
374}
375
376extern __inline void
377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378_mm512_store_ps (void *__P, __m512 __A)
379{
380 *(__m512 *) __P = __A;
381}
382
383extern __inline void
384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
385_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
386{
387 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
388 (__mmask16) __U);
389}
390
391extern __inline __m512i
392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
393_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
394{
395 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
396 (__v8di) __W,
397 (__mmask8) __U);
398}
399
400extern __inline __m512i
401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
403{
404 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
405 (__v8di)
406 _mm512_setzero_si512 (),
407 (__mmask8) __U);
408}
409
410extern __inline __m512i
411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412_mm512_load_epi64 (void const *__P)
413{
414 return *(__m512i *) __P;
415}
416
417extern __inline __m512i
418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
420{
421 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
422 (__v8di) __W,
423 (__mmask8) __U);
424}
425
426extern __inline __m512i
427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
429{
430 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
431 (__v8di)
432 _mm512_setzero_si512 (),
433 (__mmask8) __U);
434}
435
436extern __inline void
437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438_mm512_store_epi64 (void *__P, __m512i __A)
439{
440 *(__m512i *) __P = __A;
441}
442
443extern __inline void
444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
445_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
446{
447 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
448 (__mmask8) __U);
449}
450
451extern __inline __m512i
452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
454{
455 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
456 (__v16si) __W,
457 (__mmask16) __U);
458}
459
460extern __inline __m512i
461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
462_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
463{
464 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
465 (__v16si)
466 _mm512_setzero_si512 (),
467 (__mmask16) __U);
468}
469
470extern __inline __m512i
471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472_mm512_load_si512 (void const *__P)
473{
474 return *(__m512i *) __P;
475}
476
477extern __inline __m512i
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479_mm512_load_epi32 (void const *__P)
480{
481 return *(__m512i *) __P;
482}
483
484extern __inline __m512i
485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
487{
488 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
489 (__v16si) __W,
490 (__mmask16) __U);
491}
492
493extern __inline __m512i
494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
496{
497 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
498 (__v16si)
499 _mm512_setzero_si512 (),
500 (__mmask16) __U);
501}
502
503extern __inline void
504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505_mm512_store_si512 (void *__P, __m512i __A)
506{
507 *(__m512i *) __P = __A;
508}
509
510extern __inline void
511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512_mm512_store_epi32 (void *__P, __m512i __A)
513{
514 *(__m512i *) __P = __A;
515}
516
517extern __inline void
518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
519_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
520{
521 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
522 (__mmask16) __U);
523}
524
525extern __inline __m512i
526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527_mm512_mullo_epi32 (__m512i __A, __m512i __B)
528{
2069d6fc 529 return (__m512i) ((__v16su) __A * (__v16su) __B);
756c5857
AI
530}
531
532extern __inline __m512i
533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
534_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
535{
536 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
537 (__v16si) __B,
538 (__v16si)
539 _mm512_setzero_si512 (),
540 __M);
541}
542
543extern __inline __m512i
544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
545_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
546{
547 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
548 (__v16si) __B,
549 (__v16si) __W, __M);
550}
551
552extern __inline __m512i
553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
555{
556 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
557 (__v16si) __Y,
558 (__v16si)
4271e5cb 559 _mm512_undefined_epi32 (),
756c5857
AI
560 (__mmask16) -1);
561}
562
563extern __inline __m512i
564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
565_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
566{
567 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
568 (__v16si) __Y,
569 (__v16si) __W,
570 (__mmask16) __U);
571}
572
573extern __inline __m512i
574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
576{
577 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
578 (__v16si) __Y,
579 (__v16si)
580 _mm512_setzero_si512 (),
581 (__mmask16) __U);
582}
583
584extern __inline __m512i
585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
586_mm512_srav_epi32 (__m512i __X, __m512i __Y)
587{
588 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
589 (__v16si) __Y,
590 (__v16si)
4271e5cb 591 _mm512_undefined_epi32 (),
756c5857
AI
592 (__mmask16) -1);
593}
594
595extern __inline __m512i
596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
598{
599 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
600 (__v16si) __Y,
601 (__v16si) __W,
602 (__mmask16) __U);
603}
604
605extern __inline __m512i
606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
608{
609 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
610 (__v16si) __Y,
611 (__v16si)
612 _mm512_setzero_si512 (),
613 (__mmask16) __U);
614}
615
616extern __inline __m512i
617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
619{
620 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
621 (__v16si) __Y,
622 (__v16si)
4271e5cb 623 _mm512_undefined_epi32 (),
756c5857
AI
624 (__mmask16) -1);
625}
626
627extern __inline __m512i
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
630{
631 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
632 (__v16si) __Y,
633 (__v16si) __W,
634 (__mmask16) __U);
635}
636
637extern __inline __m512i
638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
639_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
640{
641 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
642 (__v16si) __Y,
643 (__v16si)
644 _mm512_setzero_si512 (),
645 (__mmask16) __U);
646}
647
648extern __inline __m512i
649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650_mm512_add_epi64 (__m512i __A, __m512i __B)
651{
2069d6fc 652 return (__m512i) ((__v8du) __A + (__v8du) __B);
756c5857
AI
653}
654
655extern __inline __m512i
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
658{
659 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
660 (__v8di) __B,
661 (__v8di) __W,
662 (__mmask8) __U);
663}
664
665extern __inline __m512i
666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
667_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
668{
669 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
670 (__v8di) __B,
671 (__v8di)
672 _mm512_setzero_si512 (),
673 (__mmask8) __U);
674}
675
676extern __inline __m512i
677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678_mm512_sub_epi64 (__m512i __A, __m512i __B)
679{
2069d6fc 680 return (__m512i) ((__v8du) __A - (__v8du) __B);
756c5857
AI
681}
682
683extern __inline __m512i
684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
686{
687 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
688 (__v8di) __B,
689 (__v8di) __W,
690 (__mmask8) __U);
691}
692
693extern __inline __m512i
694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
695_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
696{
697 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
698 (__v8di) __B,
699 (__v8di)
700 _mm512_setzero_si512 (),
701 (__mmask8) __U);
702}
703
704extern __inline __m512i
705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
706_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
707{
708 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
709 (__v8di) __Y,
710 (__v8di)
0b192937 711 _mm512_undefined_pd (),
756c5857
AI
712 (__mmask8) -1);
713}
714
715extern __inline __m512i
716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
717_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
718{
719 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
720 (__v8di) __Y,
721 (__v8di) __W,
722 (__mmask8) __U);
723}
724
725extern __inline __m512i
726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
727_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
728{
729 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
730 (__v8di) __Y,
731 (__v8di)
732 _mm512_setzero_si512 (),
733 (__mmask8) __U);
734}
735
736extern __inline __m512i
737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
738_mm512_srav_epi64 (__m512i __X, __m512i __Y)
739{
740 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
741 (__v8di) __Y,
742 (__v8di)
4271e5cb 743 _mm512_undefined_epi32 (),
756c5857
AI
744 (__mmask8) -1);
745}
746
747extern __inline __m512i
748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
749_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
750{
751 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
752 (__v8di) __Y,
753 (__v8di) __W,
754 (__mmask8) __U);
755}
756
757extern __inline __m512i
758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
759_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
760{
761 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
762 (__v8di) __Y,
763 (__v8di)
764 _mm512_setzero_si512 (),
765 (__mmask8) __U);
766}
767
768extern __inline __m512i
769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
770_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
771{
772 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
773 (__v8di) __Y,
774 (__v8di)
4271e5cb 775 _mm512_undefined_epi32 (),
756c5857
AI
776 (__mmask8) -1);
777}
778
779extern __inline __m512i
780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
781_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
782{
783 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
784 (__v8di) __Y,
785 (__v8di) __W,
786 (__mmask8) __U);
787}
788
789extern __inline __m512i
790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
791_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
792{
793 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
794 (__v8di) __Y,
795 (__v8di)
796 _mm512_setzero_si512 (),
797 (__mmask8) __U);
798}
799
800extern __inline __m512i
801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802_mm512_add_epi32 (__m512i __A, __m512i __B)
803{
2069d6fc 804 return (__m512i) ((__v16su) __A + (__v16su) __B);
756c5857
AI
805}
806
807extern __inline __m512i
808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
810{
811 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
812 (__v16si) __B,
813 (__v16si) __W,
814 (__mmask16) __U);
815}
816
817extern __inline __m512i
818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
819_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
820{
821 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
822 (__v16si) __B,
823 (__v16si)
824 _mm512_setzero_si512 (),
825 (__mmask16) __U);
826}
827
828extern __inline __m512i
829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
830_mm512_mul_epi32 (__m512i __X, __m512i __Y)
831{
832 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
833 (__v16si) __Y,
834 (__v8di)
4271e5cb 835 _mm512_undefined_epi32 (),
756c5857
AI
836 (__mmask8) -1);
837}
838
839extern __inline __m512i
840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
841_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
842{
843 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
844 (__v16si) __Y,
845 (__v8di) __W, __M);
846}
847
848extern __inline __m512i
849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
851{
852 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
853 (__v16si) __Y,
854 (__v8di)
855 _mm512_setzero_si512 (),
856 __M);
857}
858
859extern __inline __m512i
860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861_mm512_sub_epi32 (__m512i __A, __m512i __B)
862{
2069d6fc 863 return (__m512i) ((__v16su) __A - (__v16su) __B);
756c5857
AI
864}
865
866extern __inline __m512i
867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
869{
870 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
871 (__v16si) __B,
872 (__v16si) __W,
873 (__mmask16) __U);
874}
875
876extern __inline __m512i
877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
878_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
879{
880 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
881 (__v16si) __B,
882 (__v16si)
883 _mm512_setzero_si512 (),
884 (__mmask16) __U);
885}
886
887extern __inline __m512i
888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889_mm512_mul_epu32 (__m512i __X, __m512i __Y)
890{
891 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
892 (__v16si) __Y,
893 (__v8di)
4271e5cb 894 _mm512_undefined_epi32 (),
756c5857
AI
895 (__mmask8) -1);
896}
897
898extern __inline __m512i
899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
901{
902 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
903 (__v16si) __Y,
904 (__v8di) __W, __M);
905}
906
907extern __inline __m512i
908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
909_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
910{
911 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
912 (__v16si) __Y,
913 (__v8di)
914 _mm512_setzero_si512 (),
915 __M);
916}
917
918#ifdef __OPTIMIZE__
919extern __inline __m512i
920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921_mm512_slli_epi64 (__m512i __A, unsigned int __B)
922{
923 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
924 (__v8di)
4271e5cb 925 _mm512_undefined_epi32 (),
756c5857
AI
926 (__mmask8) -1);
927}
928
929extern __inline __m512i
930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
931_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
932 unsigned int __B)
933{
934 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
935 (__v8di) __W,
936 (__mmask8) __U);
937}
938
939extern __inline __m512i
940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
941_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
942{
943 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
944 (__v8di)
945 _mm512_setzero_si512 (),
946 (__mmask8) __U);
947}
948#else
949#define _mm512_slli_epi64(X, C) \
950 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 951 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
952 (__mmask8)-1))
953
954#define _mm512_mask_slli_epi64(W, U, X, C) \
955 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
956 (__v8di)(__m512i)(W),\
957 (__mmask8)(U)))
958
959#define _mm512_maskz_slli_epi64(U, X, C) \
960 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
961 (__v8di)(__m512i)_mm512_setzero_si512 (),\
962 (__mmask8)(U)))
963#endif
964
965extern __inline __m512i
966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967_mm512_sll_epi64 (__m512i __A, __m128i __B)
968{
969 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
970 (__v2di) __B,
971 (__v8di)
4271e5cb 972 _mm512_undefined_epi32 (),
756c5857
AI
973 (__mmask8) -1);
974}
975
976extern __inline __m512i
977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
979{
980 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
981 (__v2di) __B,
982 (__v8di) __W,
983 (__mmask8) __U);
984}
985
986extern __inline __m512i
987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
988_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
989{
990 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
991 (__v2di) __B,
992 (__v8di)
993 _mm512_setzero_si512 (),
994 (__mmask8) __U);
995}
996
997#ifdef __OPTIMIZE__
998extern __inline __m512i
999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1000_mm512_srli_epi64 (__m512i __A, unsigned int __B)
1001{
1002 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1003 (__v8di)
4271e5cb 1004 _mm512_undefined_epi32 (),
756c5857
AI
1005 (__mmask8) -1);
1006}
1007
1008extern __inline __m512i
1009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1010_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1011 __m512i __A, unsigned int __B)
1012{
1013 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1014 (__v8di) __W,
1015 (__mmask8) __U);
1016}
1017
1018extern __inline __m512i
1019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1020_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1021{
1022 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1023 (__v8di)
1024 _mm512_setzero_si512 (),
1025 (__mmask8) __U);
1026}
1027#else
1028#define _mm512_srli_epi64(X, C) \
1029 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1030 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1031 (__mmask8)-1))
1032
1033#define _mm512_mask_srli_epi64(W, U, X, C) \
1034 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1035 (__v8di)(__m512i)(W),\
1036 (__mmask8)(U)))
1037
1038#define _mm512_maskz_srli_epi64(U, X, C) \
1039 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1040 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1041 (__mmask8)(U)))
1042#endif
1043
1044extern __inline __m512i
1045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1046_mm512_srl_epi64 (__m512i __A, __m128i __B)
1047{
1048 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1049 (__v2di) __B,
1050 (__v8di)
4271e5cb 1051 _mm512_undefined_epi32 (),
756c5857
AI
1052 (__mmask8) -1);
1053}
1054
1055extern __inline __m512i
1056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1057_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1058{
1059 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1060 (__v2di) __B,
1061 (__v8di) __W,
1062 (__mmask8) __U);
1063}
1064
1065extern __inline __m512i
1066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1067_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1068{
1069 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1070 (__v2di) __B,
1071 (__v8di)
1072 _mm512_setzero_si512 (),
1073 (__mmask8) __U);
1074}
1075
1076#ifdef __OPTIMIZE__
1077extern __inline __m512i
1078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1079_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1080{
1081 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1082 (__v8di)
4271e5cb 1083 _mm512_undefined_epi32 (),
756c5857
AI
1084 (__mmask8) -1);
1085}
1086
1087extern __inline __m512i
1088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1089_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1090 unsigned int __B)
1091{
1092 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1093 (__v8di) __W,
1094 (__mmask8) __U);
1095}
1096
1097extern __inline __m512i
1098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1099_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1100{
1101 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1102 (__v8di)
1103 _mm512_setzero_si512 (),
1104 (__mmask8) __U);
1105}
1106#else
1107#define _mm512_srai_epi64(X, C) \
1108 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1109 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1110 (__mmask8)-1))
1111
1112#define _mm512_mask_srai_epi64(W, U, X, C) \
1113 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1114 (__v8di)(__m512i)(W),\
1115 (__mmask8)(U)))
1116
1117#define _mm512_maskz_srai_epi64(U, X, C) \
1118 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1119 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1120 (__mmask8)(U)))
1121#endif
1122
1123extern __inline __m512i
1124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125_mm512_sra_epi64 (__m512i __A, __m128i __B)
1126{
1127 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1128 (__v2di) __B,
1129 (__v8di)
4271e5cb 1130 _mm512_undefined_epi32 (),
756c5857
AI
1131 (__mmask8) -1);
1132}
1133
1134extern __inline __m512i
1135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1136_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1137{
1138 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1139 (__v2di) __B,
1140 (__v8di) __W,
1141 (__mmask8) __U);
1142}
1143
1144extern __inline __m512i
1145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1146_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1147{
1148 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1149 (__v2di) __B,
1150 (__v8di)
1151 _mm512_setzero_si512 (),
1152 (__mmask8) __U);
1153}
1154
1155#ifdef __OPTIMIZE__
1156extern __inline __m512i
1157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1158_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1159{
1160 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1161 (__v16si)
4271e5cb 1162 _mm512_undefined_epi32 (),
756c5857
AI
1163 (__mmask16) -1);
1164}
1165
1166extern __inline __m512i
1167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1169 unsigned int __B)
1170{
1171 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1172 (__v16si) __W,
1173 (__mmask16) __U);
1174}
1175
1176extern __inline __m512i
1177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1179{
1180 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1181 (__v16si)
1182 _mm512_setzero_si512 (),
1183 (__mmask16) __U);
1184}
1185#else
1186#define _mm512_slli_epi32(X, C) \
1187 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1188 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1189 (__mmask16)-1))
1190
1191#define _mm512_mask_slli_epi32(W, U, X, C) \
1192 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1193 (__v16si)(__m512i)(W),\
1194 (__mmask16)(U)))
1195
1196#define _mm512_maskz_slli_epi32(U, X, C) \
1197 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1198 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1199 (__mmask16)(U)))
1200#endif
1201
1202extern __inline __m512i
1203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1204_mm512_sll_epi32 (__m512i __A, __m128i __B)
1205{
1206 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1207 (__v4si) __B,
1208 (__v16si)
4271e5cb 1209 _mm512_undefined_epi32 (),
756c5857
AI
1210 (__mmask16) -1);
1211}
1212
1213extern __inline __m512i
1214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1215_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1216{
1217 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1218 (__v4si) __B,
1219 (__v16si) __W,
1220 (__mmask16) __U);
1221}
1222
1223extern __inline __m512i
1224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1225_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1226{
1227 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1228 (__v4si) __B,
1229 (__v16si)
1230 _mm512_setzero_si512 (),
1231 (__mmask16) __U);
1232}
1233
1234#ifdef __OPTIMIZE__
1235extern __inline __m512i
1236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1237_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1238{
1239 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1240 (__v16si)
4271e5cb 1241 _mm512_undefined_epi32 (),
756c5857
AI
1242 (__mmask16) -1);
1243}
1244
1245extern __inline __m512i
1246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1248 __m512i __A, unsigned int __B)
1249{
1250 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1251 (__v16si) __W,
1252 (__mmask16) __U);
1253}
1254
1255extern __inline __m512i
1256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1257_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1258{
1259 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1260 (__v16si)
1261 _mm512_setzero_si512 (),
1262 (__mmask16) __U);
1263}
1264#else
1265#define _mm512_srli_epi32(X, C) \
1266 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1267 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1268 (__mmask16)-1))
1269
1270#define _mm512_mask_srli_epi32(W, U, X, C) \
1271 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1272 (__v16si)(__m512i)(W),\
1273 (__mmask16)(U)))
1274
1275#define _mm512_maskz_srli_epi32(U, X, C) \
1276 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1277 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1278 (__mmask16)(U)))
1279#endif
1280
1281extern __inline __m512i
1282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283_mm512_srl_epi32 (__m512i __A, __m128i __B)
1284{
1285 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1286 (__v4si) __B,
1287 (__v16si)
4271e5cb 1288 _mm512_undefined_epi32 (),
756c5857
AI
1289 (__mmask16) -1);
1290}
1291
1292extern __inline __m512i
1293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1294_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1295{
1296 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1297 (__v4si) __B,
1298 (__v16si) __W,
1299 (__mmask16) __U);
1300}
1301
1302extern __inline __m512i
1303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1305{
1306 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1307 (__v4si) __B,
1308 (__v16si)
1309 _mm512_setzero_si512 (),
1310 (__mmask16) __U);
1311}
1312
1313#ifdef __OPTIMIZE__
1314extern __inline __m512i
1315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1316_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1317{
1318 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1319 (__v16si)
4271e5cb 1320 _mm512_undefined_epi32 (),
756c5857
AI
1321 (__mmask16) -1);
1322}
1323
1324extern __inline __m512i
1325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1327 unsigned int __B)
1328{
1329 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1330 (__v16si) __W,
1331 (__mmask16) __U);
1332}
1333
1334extern __inline __m512i
1335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1336_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1337{
1338 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1339 (__v16si)
1340 _mm512_setzero_si512 (),
1341 (__mmask16) __U);
1342}
1343#else
1344#define _mm512_srai_epi32(X, C) \
1345 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1346 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1347 (__mmask16)-1))
1348
1349#define _mm512_mask_srai_epi32(W, U, X, C) \
1350 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1351 (__v16si)(__m512i)(W),\
1352 (__mmask16)(U)))
1353
1354#define _mm512_maskz_srai_epi32(U, X, C) \
1355 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1356 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1357 (__mmask16)(U)))
1358#endif
1359
1360extern __inline __m512i
1361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362_mm512_sra_epi32 (__m512i __A, __m128i __B)
1363{
1364 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1365 (__v4si) __B,
1366 (__v16si)
4271e5cb 1367 _mm512_undefined_epi32 (),
756c5857
AI
1368 (__mmask16) -1);
1369}
1370
1371extern __inline __m512i
1372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1373_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1374{
1375 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1376 (__v4si) __B,
1377 (__v16si) __W,
1378 (__mmask16) __U);
1379}
1380
1381extern __inline __m512i
1382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1383_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1384{
1385 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1386 (__v4si) __B,
1387 (__v16si)
1388 _mm512_setzero_si512 (),
1389 (__mmask16) __U);
1390}
1391
075691af
AI
1392#ifdef __OPTIMIZE__
1393extern __inline __m128d
1394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1395_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1396{
1397 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1398 (__v2df) __B,
1399 __R);
1400}
1401
1402extern __inline __m128
1403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1404_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1405{
1406 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1407 (__v4sf) __B,
1408 __R);
1409}
1410
1411extern __inline __m128d
1412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1413_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1414{
1415 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1416 (__v2df) __B,
1417 __R);
1418}
1419
1420extern __inline __m128
1421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1423{
1424 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1425 (__v4sf) __B,
1426 __R);
1427}
1428
1429#else
1430#define _mm_add_round_sd(A, B, C) \
1431 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1432
1433#define _mm_add_round_ss(A, B, C) \
1434 (__m128)__builtin_ia32_addss_round(A, B, C)
1435
1436#define _mm_sub_round_sd(A, B, C) \
1437 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1438
1439#define _mm_sub_round_ss(A, B, C) \
1440 (__m128)__builtin_ia32_subss_round(A, B, C)
1441#endif
1442
756c5857
AI
1443#ifdef __OPTIMIZE__
1444extern __inline __m512i
1445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1446_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1447 const int __imm)
756c5857
AI
1448{
1449 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1450 (__v8di) __B,
b5fd0b71 1451 (__v8di) __C, __imm,
756c5857
AI
1452 (__mmask8) -1);
1453}
1454
1455extern __inline __m512i
1456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1457_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
b5fd0b71 1458 __m512i __C, const int __imm)
756c5857
AI
1459{
1460 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1461 (__v8di) __B,
b5fd0b71 1462 (__v8di) __C, __imm,
756c5857
AI
1463 (__mmask8) __U);
1464}
1465
1466extern __inline __m512i
1467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1468_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
b5fd0b71 1469 __m512i __C, const int __imm)
756c5857
AI
1470{
1471 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1472 (__v8di) __B,
1473 (__v8di) __C,
b5fd0b71 1474 __imm, (__mmask8) __U);
756c5857
AI
1475}
1476
1477extern __inline __m512i
1478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1479_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1480 const int __imm)
756c5857
AI
1481{
1482 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1483 (__v16si) __B,
1484 (__v16si) __C,
b5fd0b71 1485 __imm, (__mmask16) -1);
756c5857
AI
1486}
1487
1488extern __inline __m512i
1489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
b5fd0b71 1491 __m512i __C, const int __imm)
756c5857
AI
1492{
1493 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1494 (__v16si) __B,
1495 (__v16si) __C,
b5fd0b71 1496 __imm, (__mmask16) __U);
756c5857
AI
1497}
1498
1499extern __inline __m512i
1500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
b5fd0b71 1502 __m512i __C, const int __imm)
756c5857
AI
1503{
1504 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1505 (__v16si) __B,
1506 (__v16si) __C,
b5fd0b71 1507 __imm, (__mmask16) __U);
756c5857
AI
1508}
1509#else
1510#define _mm512_ternarylogic_epi64(A, B, C, I) \
1511 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1512 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1513#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1514 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1515 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1516#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1517 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1518 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1519#define _mm512_ternarylogic_epi32(A, B, C, I) \
1520 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1521 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1522 (__mmask16)-1))
1523#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1524 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1525 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1526 (__mmask16)(U)))
1527#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1528 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1529 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1530 (__mmask16)(U)))
1531#endif
1532
1533extern __inline __m512d
1534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535_mm512_rcp14_pd (__m512d __A)
1536{
1537 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1538 (__v8df)
0b192937 1539 _mm512_undefined_pd (),
756c5857
AI
1540 (__mmask8) -1);
1541}
1542
1543extern __inline __m512d
1544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1546{
1547 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1548 (__v8df) __W,
1549 (__mmask8) __U);
1550}
1551
1552extern __inline __m512d
1553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1554_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1555{
1556 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1557 (__v8df)
1558 _mm512_setzero_pd (),
1559 (__mmask8) __U);
1560}
1561
1562extern __inline __m512
1563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1564_mm512_rcp14_ps (__m512 __A)
1565{
1566 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1567 (__v16sf)
0b192937 1568 _mm512_undefined_ps (),
756c5857
AI
1569 (__mmask16) -1);
1570}
1571
1572extern __inline __m512
1573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1575{
1576 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1577 (__v16sf) __W,
1578 (__mmask16) __U);
1579}
1580
1581extern __inline __m512
1582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1583_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1584{
1585 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1586 (__v16sf)
1587 _mm512_setzero_ps (),
1588 (__mmask16) __U);
1589}
1590
075691af
AI
1591extern __inline __m128d
1592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593_mm_rcp14_sd (__m128d __A, __m128d __B)
1594{
df62b4af
IT
1595 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1596 (__v2df) __A);
075691af
AI
1597}
1598
1599extern __inline __m128
1600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601_mm_rcp14_ss (__m128 __A, __m128 __B)
1602{
df62b4af
IT
1603 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1604 (__v4sf) __A);
075691af
AI
1605}
1606
756c5857
AI
1607extern __inline __m512d
1608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1609_mm512_rsqrt14_pd (__m512d __A)
1610{
1611 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1612 (__v8df)
0b192937 1613 _mm512_undefined_pd (),
756c5857
AI
1614 (__mmask8) -1);
1615}
1616
1617extern __inline __m512d
1618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1620{
1621 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1622 (__v8df) __W,
1623 (__mmask8) __U);
1624}
1625
1626extern __inline __m512d
1627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1628_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1629{
1630 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1631 (__v8df)
1632 _mm512_setzero_pd (),
1633 (__mmask8) __U);
1634}
1635
1636extern __inline __m512
1637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1638_mm512_rsqrt14_ps (__m512 __A)
1639{
1640 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1641 (__v16sf)
0b192937 1642 _mm512_undefined_ps (),
756c5857
AI
1643 (__mmask16) -1);
1644}
1645
1646extern __inline __m512
1647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1649{
1650 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1651 (__v16sf) __W,
1652 (__mmask16) __U);
1653}
1654
1655extern __inline __m512
1656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1657_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1658{
1659 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1660 (__v16sf)
1661 _mm512_setzero_ps (),
1662 (__mmask16) __U);
1663}
1664
075691af
AI
1665extern __inline __m128d
1666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1667_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1668{
df62b4af
IT
1669 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1670 (__v2df) __A);
075691af
AI
1671}
1672
1673extern __inline __m128
1674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1676{
df62b4af
IT
1677 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1678 (__v4sf) __A);
075691af
AI
1679}
1680
756c5857
AI
1681#ifdef __OPTIMIZE__
1682extern __inline __m512d
1683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1684_mm512_sqrt_round_pd (__m512d __A, const int __R)
1685{
1686 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1687 (__v8df)
0b192937 1688 _mm512_undefined_pd (),
756c5857
AI
1689 (__mmask8) -1, __R);
1690}
1691
1692extern __inline __m512d
1693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1695 const int __R)
1696{
1697 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1698 (__v8df) __W,
1699 (__mmask8) __U, __R);
1700}
1701
1702extern __inline __m512d
1703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1704_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1705{
1706 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1707 (__v8df)
1708 _mm512_setzero_pd (),
1709 (__mmask8) __U, __R);
1710}
1711
1712extern __inline __m512
1713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1714_mm512_sqrt_round_ps (__m512 __A, const int __R)
1715{
1716 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1717 (__v16sf)
0b192937 1718 _mm512_undefined_ps (),
756c5857
AI
1719 (__mmask16) -1, __R);
1720}
1721
1722extern __inline __m512
1723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1724_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1725{
1726 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1727 (__v16sf) __W,
1728 (__mmask16) __U, __R);
1729}
1730
1731extern __inline __m512
1732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1733_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1734{
1735 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1736 (__v16sf)
1737 _mm512_setzero_ps (),
1738 (__mmask16) __U, __R);
1739}
1740
075691af
AI
1741extern __inline __m128d
1742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1744{
1745 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1746 (__v2df) __A,
1747 __R);
1748}
1749
1750extern __inline __m128
1751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1753{
1754 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1755 (__v4sf) __A,
1756 __R);
1757}
756c5857
AI
1758#else
1759#define _mm512_sqrt_round_pd(A, C) \
0b192937 1760 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
1761
1762#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1763 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1764
1765#define _mm512_maskz_sqrt_round_pd(U, A, C) \
1766 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1767
1768#define _mm512_sqrt_round_ps(A, C) \
0b192937 1769 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
1770
1771#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1772 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1773
1774#define _mm512_maskz_sqrt_round_ps(U, A, C) \
1775 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
1776
1777#define _mm_sqrt_round_sd(A, B, C) \
1778 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1779
1780#define _mm_sqrt_round_ss(A, B, C) \
1781 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
756c5857
AI
1782#endif
1783
1784extern __inline __m512i
1785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1786_mm512_cvtepi8_epi32 (__m128i __A)
1787{
1788 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1789 (__v16si)
4271e5cb 1790 _mm512_undefined_epi32 (),
756c5857
AI
1791 (__mmask16) -1);
1792}
1793
1794extern __inline __m512i
1795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1797{
1798 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1799 (__v16si) __W,
1800 (__mmask16) __U);
1801}
1802
1803extern __inline __m512i
1804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1806{
1807 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1808 (__v16si)
1809 _mm512_setzero_si512 (),
1810 (__mmask16) __U);
1811}
1812
1813extern __inline __m512i
1814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1815_mm512_cvtepi8_epi64 (__m128i __A)
1816{
1817 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1818 (__v8di)
4271e5cb 1819 _mm512_undefined_epi32 (),
756c5857
AI
1820 (__mmask8) -1);
1821}
1822
1823extern __inline __m512i
1824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1826{
1827 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1828 (__v8di) __W,
1829 (__mmask8) __U);
1830}
1831
1832extern __inline __m512i
1833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1834_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1835{
1836 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1837 (__v8di)
1838 _mm512_setzero_si512 (),
1839 (__mmask8) __U);
1840}
1841
1842extern __inline __m512i
1843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1844_mm512_cvtepi16_epi32 (__m256i __A)
1845{
1846 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1847 (__v16si)
4271e5cb 1848 _mm512_undefined_epi32 (),
756c5857
AI
1849 (__mmask16) -1);
1850}
1851
1852extern __inline __m512i
1853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1855{
1856 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1857 (__v16si) __W,
1858 (__mmask16) __U);
1859}
1860
1861extern __inline __m512i
1862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1863_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1864{
1865 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1866 (__v16si)
1867 _mm512_setzero_si512 (),
1868 (__mmask16) __U);
1869}
1870
1871extern __inline __m512i
1872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1873_mm512_cvtepi16_epi64 (__m128i __A)
1874{
1875 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1876 (__v8di)
4271e5cb 1877 _mm512_undefined_epi32 (),
756c5857
AI
1878 (__mmask8) -1);
1879}
1880
1881extern __inline __m512i
1882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1884{
1885 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1886 (__v8di) __W,
1887 (__mmask8) __U);
1888}
1889
1890extern __inline __m512i
1891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1893{
1894 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1895 (__v8di)
1896 _mm512_setzero_si512 (),
1897 (__mmask8) __U);
1898}
1899
1900extern __inline __m512i
1901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1902_mm512_cvtepi32_epi64 (__m256i __X)
1903{
1904 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1905 (__v8di)
4271e5cb 1906 _mm512_undefined_epi32 (),
756c5857
AI
1907 (__mmask8) -1);
1908}
1909
1910extern __inline __m512i
1911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1912_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1913{
1914 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1915 (__v8di) __W,
1916 (__mmask8) __U);
1917}
1918
1919extern __inline __m512i
1920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1921_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1922{
1923 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1924 (__v8di)
1925 _mm512_setzero_si512 (),
1926 (__mmask8) __U);
1927}
1928
1929extern __inline __m512i
1930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931_mm512_cvtepu8_epi32 (__m128i __A)
1932{
1933 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1934 (__v16si)
4271e5cb 1935 _mm512_undefined_epi32 (),
756c5857
AI
1936 (__mmask16) -1);
1937}
1938
1939extern __inline __m512i
1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1942{
1943 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1944 (__v16si) __W,
1945 (__mmask16) __U);
1946}
1947
1948extern __inline __m512i
1949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1950_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1951{
1952 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1953 (__v16si)
1954 _mm512_setzero_si512 (),
1955 (__mmask16) __U);
1956}
1957
1958extern __inline __m512i
1959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960_mm512_cvtepu8_epi64 (__m128i __A)
1961{
1962 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1963 (__v8di)
4271e5cb 1964 _mm512_undefined_epi32 (),
756c5857
AI
1965 (__mmask8) -1);
1966}
1967
1968extern __inline __m512i
1969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1971{
1972 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1973 (__v8di) __W,
1974 (__mmask8) __U);
1975}
1976
1977extern __inline __m512i
1978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1979_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1980{
1981 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1982 (__v8di)
1983 _mm512_setzero_si512 (),
1984 (__mmask8) __U);
1985}
1986
1987extern __inline __m512i
1988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1989_mm512_cvtepu16_epi32 (__m256i __A)
1990{
1991 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1992 (__v16si)
4271e5cb 1993 _mm512_undefined_epi32 (),
756c5857
AI
1994 (__mmask16) -1);
1995}
1996
1997extern __inline __m512i
1998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2000{
2001 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2002 (__v16si) __W,
2003 (__mmask16) __U);
2004}
2005
2006extern __inline __m512i
2007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2009{
2010 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2011 (__v16si)
2012 _mm512_setzero_si512 (),
2013 (__mmask16) __U);
2014}
2015
2016extern __inline __m512i
2017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2018_mm512_cvtepu16_epi64 (__m128i __A)
2019{
2020 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2021 (__v8di)
4271e5cb 2022 _mm512_undefined_epi32 (),
756c5857
AI
2023 (__mmask8) -1);
2024}
2025
2026extern __inline __m512i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2029{
2030 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2031 (__v8di) __W,
2032 (__mmask8) __U);
2033}
2034
2035extern __inline __m512i
2036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2038{
2039 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2040 (__v8di)
2041 _mm512_setzero_si512 (),
2042 (__mmask8) __U);
2043}
2044
2045extern __inline __m512i
2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047_mm512_cvtepu32_epi64 (__m256i __X)
2048{
2049 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2050 (__v8di)
4271e5cb 2051 _mm512_undefined_epi32 (),
756c5857
AI
2052 (__mmask8) -1);
2053}
2054
2055extern __inline __m512i
2056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2058{
2059 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2060 (__v8di) __W,
2061 (__mmask8) __U);
2062}
2063
2064extern __inline __m512i
2065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2066_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2067{
2068 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2069 (__v8di)
2070 _mm512_setzero_si512 (),
2071 (__mmask8) __U);
2072}
2073
2074#ifdef __OPTIMIZE__
2075extern __inline __m512d
2076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2077_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2078{
2079 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2080 (__v8df) __B,
2081 (__v8df)
0b192937 2082 _mm512_undefined_pd (),
756c5857
AI
2083 (__mmask8) -1, __R);
2084}
2085
2086extern __inline __m512d
2087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2088_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2089 __m512d __B, const int __R)
2090{
2091 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2092 (__v8df) __B,
2093 (__v8df) __W,
2094 (__mmask8) __U, __R);
2095}
2096
2097extern __inline __m512d
2098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2099_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2100 const int __R)
2101{
2102 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2103 (__v8df) __B,
2104 (__v8df)
2105 _mm512_setzero_pd (),
2106 (__mmask8) __U, __R);
2107}
2108
2109extern __inline __m512
2110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2111_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2112{
2113 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2114 (__v16sf) __B,
2115 (__v16sf)
0b192937 2116 _mm512_undefined_ps (),
756c5857
AI
2117 (__mmask16) -1, __R);
2118}
2119
2120extern __inline __m512
2121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2122_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2123 __m512 __B, const int __R)
2124{
2125 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2126 (__v16sf) __B,
2127 (__v16sf) __W,
2128 (__mmask16) __U, __R);
2129}
2130
2131extern __inline __m512
2132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2133_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2134{
2135 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2136 (__v16sf) __B,
2137 (__v16sf)
2138 _mm512_setzero_ps (),
2139 (__mmask16) __U, __R);
2140}
2141
2142extern __inline __m512d
2143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2145{
2146 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2147 (__v8df) __B,
2148 (__v8df)
0b192937 2149 _mm512_undefined_pd (),
756c5857
AI
2150 (__mmask8) -1, __R);
2151}
2152
2153extern __inline __m512d
2154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2155_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2156 __m512d __B, const int __R)
2157{
2158 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2159 (__v8df) __B,
2160 (__v8df) __W,
2161 (__mmask8) __U, __R);
2162}
2163
2164extern __inline __m512d
2165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2166_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2167 const int __R)
2168{
2169 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2170 (__v8df) __B,
2171 (__v8df)
2172 _mm512_setzero_pd (),
2173 (__mmask8) __U, __R);
2174}
2175
2176extern __inline __m512
2177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2178_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2179{
2180 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2181 (__v16sf) __B,
2182 (__v16sf)
0b192937 2183 _mm512_undefined_ps (),
756c5857
AI
2184 (__mmask16) -1, __R);
2185}
2186
2187extern __inline __m512
2188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2190 __m512 __B, const int __R)
2191{
2192 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2193 (__v16sf) __B,
2194 (__v16sf) __W,
2195 (__mmask16) __U, __R);
2196}
2197
2198extern __inline __m512
2199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2200_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2201{
2202 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2203 (__v16sf) __B,
2204 (__v16sf)
2205 _mm512_setzero_ps (),
2206 (__mmask16) __U, __R);
2207}
2208#else
2209#define _mm512_add_round_pd(A, B, C) \
0b192937 2210 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2211
2212#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2213 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2214
2215#define _mm512_maskz_add_round_pd(U, A, B, C) \
2216 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2217
2218#define _mm512_add_round_ps(A, B, C) \
0b192937 2219 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2220
2221#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2222 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2223
2224#define _mm512_maskz_add_round_ps(U, A, B, C) \
2225 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2226
2227#define _mm512_sub_round_pd(A, B, C) \
0b192937 2228 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2229
2230#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2231 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2232
2233#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2234 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2235
2236#define _mm512_sub_round_ps(A, B, C) \
0b192937 2237 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2238
2239#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2240 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2241
2242#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2243 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2244#endif
2245
2246#ifdef __OPTIMIZE__
2247extern __inline __m512d
2248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2249_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2250{
2251 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2252 (__v8df) __B,
2253 (__v8df)
0b192937 2254 _mm512_undefined_pd (),
756c5857
AI
2255 (__mmask8) -1, __R);
2256}
2257
2258extern __inline __m512d
2259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2261 __m512d __B, const int __R)
2262{
2263 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2264 (__v8df) __B,
2265 (__v8df) __W,
2266 (__mmask8) __U, __R);
2267}
2268
2269extern __inline __m512d
2270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2271_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2272 const int __R)
2273{
2274 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2275 (__v8df) __B,
2276 (__v8df)
2277 _mm512_setzero_pd (),
2278 (__mmask8) __U, __R);
2279}
2280
2281extern __inline __m512
2282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2283_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2284{
2285 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2286 (__v16sf) __B,
2287 (__v16sf)
0b192937 2288 _mm512_undefined_ps (),
756c5857
AI
2289 (__mmask16) -1, __R);
2290}
2291
2292extern __inline __m512
2293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2294_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2295 __m512 __B, const int __R)
2296{
2297 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2298 (__v16sf) __B,
2299 (__v16sf) __W,
2300 (__mmask16) __U, __R);
2301}
2302
2303extern __inline __m512
2304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2305_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2306{
2307 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2308 (__v16sf) __B,
2309 (__v16sf)
2310 _mm512_setzero_ps (),
2311 (__mmask16) __U, __R);
2312}
2313
2314extern __inline __m512d
2315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2316_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2317{
2318 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2319 (__v8df) __V,
2320 (__v8df)
0b192937 2321 _mm512_undefined_pd (),
756c5857
AI
2322 (__mmask8) -1, __R);
2323}
2324
2325extern __inline __m512d
2326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2327_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2328 __m512d __V, const int __R)
2329{
2330 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2331 (__v8df) __V,
2332 (__v8df) __W,
2333 (__mmask8) __U, __R);
2334}
2335
2336extern __inline __m512d
2337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2338_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2339 const int __R)
2340{
2341 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2342 (__v8df) __V,
2343 (__v8df)
2344 _mm512_setzero_pd (),
2345 (__mmask8) __U, __R);
2346}
2347
2348extern __inline __m512
2349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2350_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2351{
2352 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2353 (__v16sf) __B,
2354 (__v16sf)
0b192937 2355 _mm512_undefined_ps (),
756c5857
AI
2356 (__mmask16) -1, __R);
2357}
2358
2359extern __inline __m512
2360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2361_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2362 __m512 __B, const int __R)
2363{
2364 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2365 (__v16sf) __B,
2366 (__v16sf) __W,
2367 (__mmask16) __U, __R);
2368}
2369
2370extern __inline __m512
2371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2372_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2373{
2374 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2375 (__v16sf) __B,
2376 (__v16sf)
2377 _mm512_setzero_ps (),
2378 (__mmask16) __U, __R);
2379}
2380
075691af
AI
2381extern __inline __m128d
2382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2383_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2384{
2385 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2386 (__v2df) __B,
2387 __R);
2388}
2389
2390extern __inline __m128
2391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2393{
2394 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2395 (__v4sf) __B,
2396 __R);
2397}
2398
2399extern __inline __m128d
2400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2402{
2403 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2404 (__v2df) __B,
2405 __R);
2406}
2407
2408extern __inline __m128
2409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2410_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2411{
2412 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2413 (__v4sf) __B,
2414 __R);
2415}
2416
756c5857
AI
2417#else
2418#define _mm512_mul_round_pd(A, B, C) \
0b192937 2419 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2420
2421#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2422 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2423
2424#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2425 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2426
2427#define _mm512_mul_round_ps(A, B, C) \
0b192937 2428 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2429
2430#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2431 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2432
2433#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2434 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2435
2436#define _mm512_div_round_pd(A, B, C) \
0b192937 2437 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2438
2439#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2440 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2441
2442#define _mm512_maskz_div_round_pd(U, A, B, C) \
2443 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2444
2445#define _mm512_div_round_ps(A, B, C) \
0b192937 2446 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2447
2448#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2449 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2450
2451#define _mm512_maskz_div_round_ps(U, A, B, C) \
2452 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2453
2454#define _mm_mul_round_sd(A, B, C) \
2455 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2456
2457#define _mm_mul_round_ss(A, B, C) \
2458 (__m128)__builtin_ia32_mulss_round(A, B, C)
2459
2460#define _mm_div_round_sd(A, B, C) \
2461 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2462
2463#define _mm_div_round_ss(A, B, C) \
2464 (__m128)__builtin_ia32_divss_round(A, B, C)
756c5857
AI
2465#endif
2466
2467#ifdef __OPTIMIZE__
2468extern __inline __m512d
2469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2471{
2472 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2473 (__v8df) __B,
2474 (__v8df)
0b192937 2475 _mm512_undefined_pd (),
756c5857
AI
2476 (__mmask8) -1, __R);
2477}
2478
2479extern __inline __m512d
2480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2481_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2482 __m512d __B, const int __R)
2483{
2484 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2485 (__v8df) __B,
2486 (__v8df) __W,
2487 (__mmask8) __U, __R);
2488}
2489
2490extern __inline __m512d
2491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2493 const int __R)
2494{
2495 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2496 (__v8df) __B,
2497 (__v8df)
2498 _mm512_setzero_pd (),
2499 (__mmask8) __U, __R);
2500}
2501
2502extern __inline __m512
2503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2504_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2505{
2506 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2507 (__v16sf) __B,
2508 (__v16sf)
0b192937 2509 _mm512_undefined_ps (),
756c5857
AI
2510 (__mmask16) -1, __R);
2511}
2512
2513extern __inline __m512
2514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2515_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2516 __m512 __B, const int __R)
2517{
2518 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2519 (__v16sf) __B,
2520 (__v16sf) __W,
2521 (__mmask16) __U, __R);
2522}
2523
2524extern __inline __m512
2525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2526_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2527{
2528 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2529 (__v16sf) __B,
2530 (__v16sf)
2531 _mm512_setzero_ps (),
2532 (__mmask16) __U, __R);
2533}
2534
2535extern __inline __m512d
2536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2537_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2538{
2539 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2540 (__v8df) __B,
2541 (__v8df)
0b192937 2542 _mm512_undefined_pd (),
756c5857
AI
2543 (__mmask8) -1, __R);
2544}
2545
2546extern __inline __m512d
2547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2548_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2549 __m512d __B, const int __R)
2550{
2551 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2552 (__v8df) __B,
2553 (__v8df) __W,
2554 (__mmask8) __U, __R);
2555}
2556
2557extern __inline __m512d
2558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2559_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2560 const int __R)
2561{
2562 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2563 (__v8df) __B,
2564 (__v8df)
2565 _mm512_setzero_pd (),
2566 (__mmask8) __U, __R);
2567}
2568
2569extern __inline __m512
2570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2572{
2573 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2574 (__v16sf) __B,
2575 (__v16sf)
0b192937 2576 _mm512_undefined_ps (),
756c5857
AI
2577 (__mmask16) -1, __R);
2578}
2579
2580extern __inline __m512
2581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2582_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2583 __m512 __B, const int __R)
2584{
2585 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2586 (__v16sf) __B,
2587 (__v16sf) __W,
2588 (__mmask16) __U, __R);
2589}
2590
2591extern __inline __m512
2592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2593_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2594{
2595 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2596 (__v16sf) __B,
2597 (__v16sf)
2598 _mm512_setzero_ps (),
2599 (__mmask16) __U, __R);
2600}
2601#else
2602#define _mm512_max_round_pd(A, B, R) \
0b192937 2603 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
2604
2605#define _mm512_mask_max_round_pd(W, U, A, B, R) \
2606 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2607
2608#define _mm512_maskz_max_round_pd(U, A, B, R) \
2609 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2610
2611#define _mm512_max_round_ps(A, B, R) \
0b192937 2612 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
756c5857
AI
2613
2614#define _mm512_mask_max_round_ps(W, U, A, B, R) \
2615 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2616
2617#define _mm512_maskz_max_round_ps(U, A, B, R) \
2618 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2619
2620#define _mm512_min_round_pd(A, B, R) \
0b192937 2621 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
2622
2623#define _mm512_mask_min_round_pd(W, U, A, B, R) \
2624 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2625
2626#define _mm512_maskz_min_round_pd(U, A, B, R) \
2627 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2628
2629#define _mm512_min_round_ps(A, B, R) \
0b192937 2630 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
756c5857
AI
2631
2632#define _mm512_mask_min_round_ps(W, U, A, B, R) \
2633 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2634
2635#define _mm512_maskz_min_round_ps(U, A, B, R) \
2636 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2637#endif
2638
2639#ifdef __OPTIMIZE__
2640extern __inline __m512d
2641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2643{
2644 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2645 (__v8df) __B,
2646 (__v8df)
0b192937 2647 _mm512_undefined_pd (),
756c5857
AI
2648 (__mmask8) -1, __R);
2649}
2650
2651extern __inline __m512d
2652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2653_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2654 __m512d __B, const int __R)
2655{
2656 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2657 (__v8df) __B,
2658 (__v8df) __W,
2659 (__mmask8) __U, __R);
2660}
2661
2662extern __inline __m512d
2663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2665 const int __R)
2666{
2667 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2668 (__v8df) __B,
2669 (__v8df)
2670 _mm512_setzero_pd (),
2671 (__mmask8) __U, __R);
2672}
2673
2674extern __inline __m512
2675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2676_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2677{
2678 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2679 (__v16sf) __B,
2680 (__v16sf)
0b192937 2681 _mm512_undefined_ps (),
756c5857
AI
2682 (__mmask16) -1, __R);
2683}
2684
2685extern __inline __m512
2686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2687_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2688 __m512 __B, const int __R)
2689{
2690 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2691 (__v16sf) __B,
2692 (__v16sf) __W,
2693 (__mmask16) __U, __R);
2694}
2695
2696extern __inline __m512
2697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2698_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2699 const int __R)
2700{
2701 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2702 (__v16sf) __B,
2703 (__v16sf)
2704 _mm512_setzero_ps (),
2705 (__mmask16) __U, __R);
2706}
2707
075691af
AI
2708extern __inline __m128d
2709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2711{
2712 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2713 (__v2df) __B,
2714 __R);
2715}
2716
2717extern __inline __m128
2718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2719_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2720{
2721 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2722 (__v4sf) __B,
2723 __R);
2724}
756c5857
AI
2725#else
2726#define _mm512_scalef_round_pd(A, B, C) \
0b192937 2727 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2728
2729#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2730 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2731
2732#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2733 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2734
2735#define _mm512_scalef_round_ps(A, B, C) \
0b192937 2736 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2737
2738#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2739 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2740
2741#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2742 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2743
2744#define _mm_scalef_round_sd(A, B, C) \
2745 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2746
2747#define _mm_scalef_round_ss(A, B, C) \
2748 (__m128)__builtin_ia32_scalefss_round(A, B, C)
756c5857
AI
2749#endif
2750
2751#ifdef __OPTIMIZE__
2752extern __inline __m512d
2753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2754_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2755{
2756 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2757 (__v8df) __B,
2758 (__v8df) __C,
2759 (__mmask8) -1, __R);
2760}
2761
2762extern __inline __m512d
2763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2764_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2765 __m512d __C, const int __R)
2766{
2767 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2768 (__v8df) __B,
2769 (__v8df) __C,
2770 (__mmask8) __U, __R);
2771}
2772
2773extern __inline __m512d
2774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2775_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2776 __mmask8 __U, const int __R)
2777{
2778 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2779 (__v8df) __B,
2780 (__v8df) __C,
2781 (__mmask8) __U, __R);
2782}
2783
2784extern __inline __m512d
2785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2787 __m512d __C, const int __R)
2788{
2789 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2790 (__v8df) __B,
2791 (__v8df) __C,
2792 (__mmask8) __U, __R);
2793}
2794
2795extern __inline __m512
2796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2798{
2799 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2800 (__v16sf) __B,
2801 (__v16sf) __C,
2802 (__mmask16) -1, __R);
2803}
2804
2805extern __inline __m512
2806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2808 __m512 __C, const int __R)
2809{
2810 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2811 (__v16sf) __B,
2812 (__v16sf) __C,
2813 (__mmask16) __U, __R);
2814}
2815
2816extern __inline __m512
2817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2818_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2819 __mmask16 __U, const int __R)
2820{
2821 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2822 (__v16sf) __B,
2823 (__v16sf) __C,
2824 (__mmask16) __U, __R);
2825}
2826
2827extern __inline __m512
2828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2829_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2830 __m512 __C, const int __R)
2831{
2832 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2833 (__v16sf) __B,
2834 (__v16sf) __C,
2835 (__mmask16) __U, __R);
2836}
2837
2838extern __inline __m512d
2839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2840_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2841{
2842 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2843 (__v8df) __B,
2844 -(__v8df) __C,
2845 (__mmask8) -1, __R);
2846}
2847
2848extern __inline __m512d
2849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2850_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2851 __m512d __C, const int __R)
2852{
2853 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2854 (__v8df) __B,
2855 -(__v8df) __C,
2856 (__mmask8) __U, __R);
2857}
2858
2859extern __inline __m512d
2860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2861_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2862 __mmask8 __U, const int __R)
2863{
2864 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2865 (__v8df) __B,
2866 (__v8df) __C,
2867 (__mmask8) __U, __R);
2868}
2869
2870extern __inline __m512d
2871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2872_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2873 __m512d __C, const int __R)
2874{
2875 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2876 (__v8df) __B,
2877 -(__v8df) __C,
2878 (__mmask8) __U, __R);
2879}
2880
2881extern __inline __m512
2882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2884{
2885 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2886 (__v16sf) __B,
2887 -(__v16sf) __C,
2888 (__mmask16) -1, __R);
2889}
2890
2891extern __inline __m512
2892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2893_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2894 __m512 __C, const int __R)
2895{
2896 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2897 (__v16sf) __B,
2898 -(__v16sf) __C,
2899 (__mmask16) __U, __R);
2900}
2901
2902extern __inline __m512
2903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2904_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2905 __mmask16 __U, const int __R)
2906{
2907 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2908 (__v16sf) __B,
2909 (__v16sf) __C,
2910 (__mmask16) __U, __R);
2911}
2912
2913extern __inline __m512
2914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2915_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2916 __m512 __C, const int __R)
2917{
2918 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2919 (__v16sf) __B,
2920 -(__v16sf) __C,
2921 (__mmask16) __U, __R);
2922}
2923
2924extern __inline __m512d
2925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2926_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2927{
2928 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2929 (__v8df) __B,
2930 (__v8df) __C,
2931 (__mmask8) -1, __R);
2932}
2933
2934extern __inline __m512d
2935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2936_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2937 __m512d __C, const int __R)
2938{
2939 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2940 (__v8df) __B,
2941 (__v8df) __C,
2942 (__mmask8) __U, __R);
2943}
2944
2945extern __inline __m512d
2946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2947_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2948 __mmask8 __U, const int __R)
2949{
2950 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2951 (__v8df) __B,
2952 (__v8df) __C,
2953 (__mmask8) __U, __R);
2954}
2955
2956extern __inline __m512d
2957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2958_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2959 __m512d __C, const int __R)
2960{
2961 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2962 (__v8df) __B,
2963 (__v8df) __C,
2964 (__mmask8) __U, __R);
2965}
2966
2967extern __inline __m512
2968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2969_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2970{
2971 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2972 (__v16sf) __B,
2973 (__v16sf) __C,
2974 (__mmask16) -1, __R);
2975}
2976
2977extern __inline __m512
2978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2979_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2980 __m512 __C, const int __R)
2981{
2982 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2983 (__v16sf) __B,
2984 (__v16sf) __C,
2985 (__mmask16) __U, __R);
2986}
2987
2988extern __inline __m512
2989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2990_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2991 __mmask16 __U, const int __R)
2992{
2993 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2994 (__v16sf) __B,
2995 (__v16sf) __C,
2996 (__mmask16) __U, __R);
2997}
2998
2999extern __inline __m512
3000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3001_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3002 __m512 __C, const int __R)
3003{
3004 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3005 (__v16sf) __B,
3006 (__v16sf) __C,
3007 (__mmask16) __U, __R);
3008}
3009
3010extern __inline __m512d
3011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3012_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3013{
3014 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3015 (__v8df) __B,
3016 -(__v8df) __C,
3017 (__mmask8) -1, __R);
3018}
3019
3020extern __inline __m512d
3021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3022_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3023 __m512d __C, const int __R)
3024{
3025 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3026 (__v8df) __B,
3027 -(__v8df) __C,
3028 (__mmask8) __U, __R);
3029}
3030
3031extern __inline __m512d
3032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3034 __mmask8 __U, const int __R)
3035{
3036 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3037 (__v8df) __B,
3038 (__v8df) __C,
3039 (__mmask8) __U, __R);
3040}
3041
3042extern __inline __m512d
3043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3045 __m512d __C, const int __R)
3046{
3047 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3048 (__v8df) __B,
3049 -(__v8df) __C,
3050 (__mmask8) __U, __R);
3051}
3052
3053extern __inline __m512
3054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3055_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3056{
3057 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3058 (__v16sf) __B,
3059 -(__v16sf) __C,
3060 (__mmask16) -1, __R);
3061}
3062
3063extern __inline __m512
3064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3065_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3066 __m512 __C, const int __R)
3067{
3068 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3069 (__v16sf) __B,
3070 -(__v16sf) __C,
3071 (__mmask16) __U, __R);
3072}
3073
3074extern __inline __m512
3075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3076_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3077 __mmask16 __U, const int __R)
3078{
3079 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3080 (__v16sf) __B,
3081 (__v16sf) __C,
3082 (__mmask16) __U, __R);
3083}
3084
3085extern __inline __m512
3086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3087_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3088 __m512 __C, const int __R)
3089{
3090 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3091 (__v16sf) __B,
3092 -(__v16sf) __C,
3093 (__mmask16) __U, __R);
3094}
3095
3096extern __inline __m512d
3097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3098_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3099{
3100 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3101 (__v8df) __B,
3102 (__v8df) __C,
3103 (__mmask8) -1, __R);
3104}
3105
3106extern __inline __m512d
3107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3108_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3109 __m512d __C, const int __R)
3110{
3111 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3112 (__v8df) __B,
3113 (__v8df) __C,
3114 (__mmask8) __U, __R);
3115}
3116
3117extern __inline __m512d
3118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3119_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3120 __mmask8 __U, const int __R)
3121{
3122 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3123 (__v8df) __B,
3124 (__v8df) __C,
3125 (__mmask8) __U, __R);
3126}
3127
3128extern __inline __m512d
3129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3131 __m512d __C, const int __R)
3132{
3133 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3134 (__v8df) __B,
3135 (__v8df) __C,
3136 (__mmask8) __U, __R);
3137}
3138
3139extern __inline __m512
3140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3141_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3142{
3143 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3144 (__v16sf) __B,
3145 (__v16sf) __C,
3146 (__mmask16) -1, __R);
3147}
3148
3149extern __inline __m512
3150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3151_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3152 __m512 __C, const int __R)
3153{
3154 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3155 (__v16sf) __B,
3156 (__v16sf) __C,
3157 (__mmask16) __U, __R);
3158}
3159
3160extern __inline __m512
3161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3162_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3163 __mmask16 __U, const int __R)
3164{
3165 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3166 (__v16sf) __B,
3167 (__v16sf) __C,
3168 (__mmask16) __U, __R);
3169}
3170
3171extern __inline __m512
3172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3173_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3174 __m512 __C, const int __R)
3175{
3176 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3177 (__v16sf) __B,
3178 (__v16sf) __C,
3179 (__mmask16) __U, __R);
3180}
3181
3182extern __inline __m512d
3183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3184_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3185{
3186 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3187 (__v8df) __B,
3188 -(__v8df) __C,
3189 (__mmask8) -1, __R);
3190}
3191
3192extern __inline __m512d
3193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3194_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3195 __m512d __C, const int __R)
3196{
3197 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3198 (__v8df) __B,
3199 (__v8df) __C,
3200 (__mmask8) __U, __R);
3201}
3202
3203extern __inline __m512d
3204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3205_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3206 __mmask8 __U, const int __R)
3207{
3208 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3209 (__v8df) __B,
3210 (__v8df) __C,
3211 (__mmask8) __U, __R);
3212}
3213
3214extern __inline __m512d
3215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3216_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3217 __m512d __C, const int __R)
3218{
3219 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3220 (__v8df) __B,
3221 -(__v8df) __C,
3222 (__mmask8) __U, __R);
3223}
3224
3225extern __inline __m512
3226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3227_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3228{
3229 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3230 (__v16sf) __B,
3231 -(__v16sf) __C,
3232 (__mmask16) -1, __R);
3233}
3234
3235extern __inline __m512
3236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3237_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3238 __m512 __C, const int __R)
3239{
3240 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3241 (__v16sf) __B,
3242 (__v16sf) __C,
3243 (__mmask16) __U, __R);
3244}
3245
3246extern __inline __m512
3247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3248_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3249 __mmask16 __U, const int __R)
3250{
3251 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3252 (__v16sf) __B,
3253 (__v16sf) __C,
3254 (__mmask16) __U, __R);
3255}
3256
3257extern __inline __m512
3258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3259_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3260 __m512 __C, const int __R)
3261{
3262 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3263 (__v16sf) __B,
3264 -(__v16sf) __C,
3265 (__mmask16) __U, __R);
3266}
3267#else
3268#define _mm512_fmadd_round_pd(A, B, C, R) \
3269 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3270
3271#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3272 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3273
3274#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3275 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3276
3277#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3278 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3279
3280#define _mm512_fmadd_round_ps(A, B, C, R) \
3281 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3282
3283#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3284 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3285
3286#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3287 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3288
3289#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3290 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3291
3292#define _mm512_fmsub_round_pd(A, B, C, R) \
3293 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3294
3295#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3296 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3297
3298#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3299 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3300
3301#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3302 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3303
3304#define _mm512_fmsub_round_ps(A, B, C, R) \
3305 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3306
3307#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3308 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3309
3310#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3311 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3312
3313#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3314 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3315
3316#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3317 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3318
3319#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3320 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3321
3322#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3323 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3324
3325#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3326 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3327
3328#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3329 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3330
3331#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3332 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3333
3334#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3335 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3336
3337#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3338 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3339
3340#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3341 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3342
3343#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3344 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3345
3346#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3347 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3348
3349#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3350 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3351
3352#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3353 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3354
3355#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3356 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3357
3358#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3359 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3360
3361#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3362 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3363
3364#define _mm512_fnmadd_round_pd(A, B, C, R) \
3365 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3366
3367#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3368 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3369
3370#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3371 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3372
3373#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3374 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3375
3376#define _mm512_fnmadd_round_ps(A, B, C, R) \
3377 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3378
3379#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3380 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3381
3382#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3383 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3384
3385#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3386 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3387
3388#define _mm512_fnmsub_round_pd(A, B, C, R) \
3389 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3390
3391#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3392 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3393
3394#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3395 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3396
3397#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3398 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3399
3400#define _mm512_fnmsub_round_ps(A, B, C, R) \
3401 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3402
3403#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3404 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3405
3406#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3407 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3408
3409#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3410 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3411#endif
3412
3413extern __inline __m512i
3414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3415_mm512_abs_epi64 (__m512i __A)
3416{
3417 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3418 (__v8di)
4271e5cb 3419 _mm512_undefined_epi32 (),
756c5857
AI
3420 (__mmask8) -1);
3421}
3422
3423extern __inline __m512i
3424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3426{
3427 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3428 (__v8di) __W,
3429 (__mmask8) __U);
3430}
3431
3432extern __inline __m512i
3433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3434_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3435{
3436 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3437 (__v8di)
3438 _mm512_setzero_si512 (),
3439 (__mmask8) __U);
3440}
3441
3442extern __inline __m512i
3443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3444_mm512_abs_epi32 (__m512i __A)
3445{
3446 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3447 (__v16si)
4271e5cb 3448 _mm512_undefined_epi32 (),
756c5857
AI
3449 (__mmask16) -1);
3450}
3451
3452extern __inline __m512i
3453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3455{
3456 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3457 (__v16si) __W,
3458 (__mmask16) __U);
3459}
3460
3461extern __inline __m512i
3462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3463_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3464{
3465 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3466 (__v16si)
3467 _mm512_setzero_si512 (),
3468 (__mmask16) __U);
3469}
3470
3471extern __inline __m512
3472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3473_mm512_broadcastss_ps (__m128 __A)
3474{
0b192937
UD
3475 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3476 (__v16sf)
3477 _mm512_undefined_ps (),
756c5857
AI
3478 (__mmask16) -1);
3479}
3480
3481extern __inline __m512
3482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3483_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3484{
3485 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3486 (__v16sf) __O, __M);
3487}
3488
3489extern __inline __m512
3490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3491_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3492{
3493 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3494 (__v16sf)
3495 _mm512_setzero_ps (),
3496 __M);
3497}
3498
3499extern __inline __m512d
3500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501_mm512_broadcastsd_pd (__m128d __A)
3502{
0b192937
UD
3503 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3504 (__v8df)
3505 _mm512_undefined_pd (),
756c5857
AI
3506 (__mmask8) -1);
3507}
3508
3509extern __inline __m512d
3510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3511_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3512{
3513 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3514 (__v8df) __O, __M);
3515}
3516
3517extern __inline __m512d
3518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3519_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3520{
3521 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3522 (__v8df)
3523 _mm512_setzero_pd (),
3524 __M);
3525}
3526
3527extern __inline __m512i
3528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3529_mm512_broadcastd_epi32 (__m128i __A)
3530{
0b192937
UD
3531 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3532 (__v16si)
4271e5cb 3533 _mm512_undefined_epi32 (),
756c5857
AI
3534 (__mmask16) -1);
3535}
3536
3537extern __inline __m512i
3538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3539_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3540{
3541 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3542 (__v16si) __O, __M);
3543}
3544
3545extern __inline __m512i
3546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3547_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3548{
3549 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3550 (__v16si)
3551 _mm512_setzero_si512 (),
3552 __M);
3553}
3554
3555extern __inline __m512i
3556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3557_mm512_set1_epi32 (int __A)
3558{
0b192937
UD
3559 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3560 (__v16si)
4271e5cb 3561 _mm512_undefined_epi32 (),
756c5857
AI
3562 (__mmask16)(-1));
3563}
3564
3565extern __inline __m512i
3566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3568{
3569 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3570 __M);
3571}
3572
3573extern __inline __m512i
3574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3575_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3576{
3577 return (__m512i)
3578 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3579 (__v16si) _mm512_setzero_si512 (),
3580 __M);
3581}
3582
3583extern __inline __m512i
3584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3585_mm512_broadcastq_epi64 (__m128i __A)
3586{
0b192937
UD
3587 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3588 (__v8di)
4271e5cb 3589 _mm512_undefined_epi32 (),
756c5857
AI
3590 (__mmask8) -1);
3591}
3592
3593extern __inline __m512i
3594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3595_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3596{
3597 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3598 (__v8di) __O, __M);
3599}
3600
3601extern __inline __m512i
3602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3603_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3604{
3605 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3606 (__v8di)
3607 _mm512_setzero_si512 (),
3608 __M);
3609}
3610
3611extern __inline __m512i
3612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3613_mm512_set1_epi64 (long long __A)
3614{
0b192937
UD
3615 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3616 (__v8di)
4271e5cb 3617 _mm512_undefined_epi32 (),
756c5857 3618 (__mmask8)(-1));
756c5857
AI
3619}
3620
3621extern __inline __m512i
3622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3623_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3624{
756c5857
AI
3625 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3626 __M);
756c5857
AI
3627}
3628
3629extern __inline __m512i
3630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3631_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3632{
756c5857
AI
3633 return (__m512i)
3634 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3635 (__v8di) _mm512_setzero_si512 (),
3636 __M);
756c5857
AI
3637}
3638
3639extern __inline __m512
3640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641_mm512_broadcast_f32x4 (__m128 __A)
3642{
0b192937
UD
3643 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3644 (__v16sf)
3645 _mm512_undefined_ps (),
756c5857
AI
3646 (__mmask16) -1);
3647}
3648
3649extern __inline __m512
3650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3652{
3653 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3654 (__v16sf) __O,
3655 __M);
3656}
3657
3658extern __inline __m512
3659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3660_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3661{
3662 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3663 (__v16sf)
3664 _mm512_setzero_ps (),
3665 __M);
3666}
3667
3668extern __inline __m512i
3669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3670_mm512_broadcast_i32x4 (__m128i __A)
3671{
756c5857 3672 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
0b192937 3673 (__v16si)
4271e5cb 3674 _mm512_undefined_epi32 (),
756c5857
AI
3675 (__mmask16) -1);
3676}
3677
3678extern __inline __m512i
3679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3681{
3682 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3683 (__v16si) __O,
3684 __M);
3685}
3686
3687extern __inline __m512i
3688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3689_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3690{
3691 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3692 (__v16si)
3693 _mm512_setzero_si512 (),
3694 __M);
3695}
3696
3697extern __inline __m512d
3698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3699_mm512_broadcast_f64x4 (__m256d __A)
3700{
756c5857 3701 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
0b192937
UD
3702 (__v8df)
3703 _mm512_undefined_pd (),
756c5857
AI
3704 (__mmask8) -1);
3705}
3706
3707extern __inline __m512d
3708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3710{
3711 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3712 (__v8df) __O,
3713 __M);
3714}
3715
3716extern __inline __m512d
3717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3718_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3719{
3720 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3721 (__v8df)
3722 _mm512_setzero_pd (),
3723 __M);
3724}
3725
3726extern __inline __m512i
3727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728_mm512_broadcast_i64x4 (__m256i __A)
3729{
756c5857 3730 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
0b192937 3731 (__v8di)
4271e5cb 3732 _mm512_undefined_epi32 (),
756c5857
AI
3733 (__mmask8) -1);
3734}
3735
3736extern __inline __m512i
3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3739{
3740 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3741 (__v8di) __O,
3742 __M);
3743}
3744
3745extern __inline __m512i
3746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3747_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3748{
3749 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3750 (__v8di)
3751 _mm512_setzero_si512 (),
3752 __M);
3753}
3754
3755typedef enum
3756{
3757 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3758 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3759 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3760 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3761 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3762 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3763 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3764 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3765 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3766 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3767 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3768 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3769 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3770 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3771 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3772 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3773 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3774 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3775 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3776 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3777 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3778 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3779 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3780 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3781 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3782 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3783 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3784 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3785 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3786 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3787 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3788 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3789 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3790 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3791 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3792 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3793 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3794 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3795 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3796 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3797 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3798 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3799 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3800 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3801 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3802 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3803 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3804 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3805 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3806 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3807 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3808 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3809 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3810 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3811 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3812 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3813 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3814 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3815 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3816 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3817 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3818 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3819 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3820 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3821 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3822 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3823 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3824 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3825 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3826 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3827 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3828 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3829 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3830 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3831 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3832 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3833 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3834 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3835 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3836 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3837 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3838 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3839 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3840 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3841 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3842 _MM_PERM_DDDD = 0xFF
3843} _MM_PERM_ENUM;
3844
3845#ifdef __OPTIMIZE__
3846extern __inline __m512i
3847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3849{
3850 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3851 __mask,
3852 (__v16si)
4271e5cb 3853 _mm512_undefined_epi32 (),
756c5857
AI
3854 (__mmask16) -1);
3855}
3856
3857extern __inline __m512i
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3860 _MM_PERM_ENUM __mask)
3861{
3862 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3863 __mask,
3864 (__v16si) __W,
3865 (__mmask16) __U);
3866}
3867
3868extern __inline __m512i
3869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3871{
3872 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3873 __mask,
3874 (__v16si)
3875 _mm512_setzero_si512 (),
3876 (__mmask16) __U);
3877}
3878
3879extern __inline __m512i
3880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3882{
3883 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3884 (__v8di) __B, __imm,
3885 (__v8di)
4271e5cb 3886 _mm512_undefined_epi32 (),
756c5857
AI
3887 (__mmask8) -1);
3888}
3889
3890extern __inline __m512i
3891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3893 __m512i __B, const int __imm)
3894{
3895 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3896 (__v8di) __B, __imm,
3897 (__v8di) __W,
3898 (__mmask8) __U);
3899}
3900
3901extern __inline __m512i
3902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3904 const int __imm)
3905{
3906 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3907 (__v8di) __B, __imm,
3908 (__v8di)
3909 _mm512_setzero_si512 (),
3910 (__mmask8) __U);
3911}
3912
3913extern __inline __m512i
3914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3915_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3916{
3917 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3918 (__v16si) __B,
3919 __imm,
3920 (__v16si)
4271e5cb 3921 _mm512_undefined_epi32 (),
756c5857
AI
3922 (__mmask16) -1);
3923}
3924
3925extern __inline __m512i
3926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3927_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3928 __m512i __B, const int __imm)
3929{
3930 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3931 (__v16si) __B,
3932 __imm,
3933 (__v16si) __W,
3934 (__mmask16) __U);
3935}
3936
3937extern __inline __m512i
3938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3939_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3940 const int __imm)
3941{
3942 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3943 (__v16si) __B,
3944 __imm,
3945 (__v16si)
3946 _mm512_setzero_si512 (),
3947 (__mmask16) __U);
3948}
3949
3950extern __inline __m512d
3951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3952_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3953{
3954 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3955 (__v8df) __B, __imm,
3956 (__v8df)
0b192937 3957 _mm512_undefined_pd (),
756c5857
AI
3958 (__mmask8) -1);
3959}
3960
3961extern __inline __m512d
3962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3963_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3964 __m512d __B, const int __imm)
3965{
3966 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3967 (__v8df) __B, __imm,
3968 (__v8df) __W,
3969 (__mmask8) __U);
3970}
3971
3972extern __inline __m512d
3973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3974_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3975 const int __imm)
3976{
3977 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3978 (__v8df) __B, __imm,
3979 (__v8df)
3980 _mm512_setzero_pd (),
3981 (__mmask8) __U);
3982}
3983
3984extern __inline __m512
3985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3986_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3987{
3988 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3989 (__v16sf) __B, __imm,
3990 (__v16sf)
0b192937 3991 _mm512_undefined_ps (),
756c5857
AI
3992 (__mmask16) -1);
3993}
3994
3995extern __inline __m512
3996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3997_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3998 __m512 __B, const int __imm)
3999{
4000 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4001 (__v16sf) __B, __imm,
4002 (__v16sf) __W,
4003 (__mmask16) __U);
4004}
4005
4006extern __inline __m512
4007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4008_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4009 const int __imm)
4010{
4011 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4012 (__v16sf) __B, __imm,
4013 (__v16sf)
4014 _mm512_setzero_ps (),
4015 (__mmask16) __U);
4016}
4017
4018#else
4019#define _mm512_shuffle_epi32(X, C) \
4020 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 4021 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4022 (__mmask16)-1))
4023
4024#define _mm512_mask_shuffle_epi32(W, U, X, C) \
4025 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4026 (__v16si)(__m512i)(W),\
4027 (__mmask16)(U)))
4028
4029#define _mm512_maskz_shuffle_epi32(U, X, C) \
4030 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4031 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4032 (__mmask16)(U)))
4033
4034#define _mm512_shuffle_i64x2(X, Y, C) \
4035 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4036 (__v8di)(__m512i)(Y), (int)(C),\
4271e5cb 4037 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4038 (__mmask8)-1))
4039
4040#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4041 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4042 (__v8di)(__m512i)(Y), (int)(C),\
4043 (__v8di)(__m512i)(W),\
4044 (__mmask8)(U)))
4045
4046#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4047 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4048 (__v8di)(__m512i)(Y), (int)(C),\
4049 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4050 (__mmask8)(U)))
4051
4052#define _mm512_shuffle_i32x4(X, Y, C) \
4053 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4054 (__v16si)(__m512i)(Y), (int)(C),\
4271e5cb 4055 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4056 (__mmask16)-1))
4057
4058#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4059 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4060 (__v16si)(__m512i)(Y), (int)(C),\
4061 (__v16si)(__m512i)(W),\
4062 (__mmask16)(U)))
4063
4064#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4065 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4066 (__v16si)(__m512i)(Y), (int)(C),\
4067 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4068 (__mmask16)(U)))
4069
4070#define _mm512_shuffle_f64x2(X, Y, C) \
4071 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4072 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 4073 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
4074 (__mmask8)-1))
4075
4076#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4077 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4078 (__v8df)(__m512d)(Y), (int)(C),\
4079 (__v8df)(__m512d)(W),\
4080 (__mmask8)(U)))
4081
4082#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4083 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4084 (__v8df)(__m512d)(Y), (int)(C),\
4085 (__v8df)(__m512d)_mm512_setzero_pd(),\
4086 (__mmask8)(U)))
4087
4088#define _mm512_shuffle_f32x4(X, Y, C) \
4089 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4090 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 4091 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
4092 (__mmask16)-1))
4093
4094#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4095 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4096 (__v16sf)(__m512)(Y), (int)(C),\
4097 (__v16sf)(__m512)(W),\
4098 (__mmask16)(U)))
4099
4100#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4101 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4102 (__v16sf)(__m512)(Y), (int)(C),\
4103 (__v16sf)(__m512)_mm512_setzero_ps(),\
4104 (__mmask16)(U)))
4105#endif
4106
4107extern __inline __m512i
4108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4109_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4110{
4111 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4112 (__v16si) __B,
4113 (__v16si)
4271e5cb 4114 _mm512_undefined_epi32 (),
756c5857
AI
4115 (__mmask16) -1);
4116}
4117
4118extern __inline __m512i
4119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4120_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4121{
4122 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4123 (__v16si) __B,
4124 (__v16si) __W,
4125 (__mmask16) __U);
4126}
4127
4128extern __inline __m512i
4129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4130_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4131{
4132 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4133 (__v16si) __B,
4134 (__v16si)
4135 _mm512_setzero_si512 (),
4136 (__mmask16) __U);
4137}
4138
4139extern __inline __m512i
4140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4141_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4142{
4143 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4144 (__v16si) __B,
4145 (__v16si)
4271e5cb 4146 _mm512_undefined_epi32 (),
756c5857
AI
4147 (__mmask16) -1);
4148}
4149
4150extern __inline __m512i
4151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4152_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4153{
4154 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4155 (__v16si) __B,
4156 (__v16si) __W,
4157 (__mmask16) __U);
4158}
4159
4160extern __inline __m512i
4161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4162_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4163{
4164 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4165 (__v16si) __B,
4166 (__v16si)
4167 _mm512_setzero_si512 (),
4168 (__mmask16) __U);
4169}
4170
4171extern __inline __m512i
4172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4173_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4174{
4175 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4176 (__v8di) __B,
4177 (__v8di)
4271e5cb 4178 _mm512_undefined_epi32 (),
756c5857
AI
4179 (__mmask8) -1);
4180}
4181
4182extern __inline __m512i
4183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4184_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4185{
4186 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4187 (__v8di) __B,
4188 (__v8di) __W,
4189 (__mmask8) __U);
4190}
4191
4192extern __inline __m512i
4193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4194_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4195{
4196 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4197 (__v8di) __B,
4198 (__v8di)
4199 _mm512_setzero_si512 (),
4200 (__mmask8) __U);
4201}
4202
4203extern __inline __m512i
4204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4205_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4206{
4207 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4208 (__v8di) __B,
4209 (__v8di)
4271e5cb 4210 _mm512_undefined_epi32 (),
756c5857
AI
4211 (__mmask8) -1);
4212}
4213
4214extern __inline __m512i
4215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4216_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4217{
4218 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4219 (__v8di) __B,
4220 (__v8di) __W,
4221 (__mmask8) __U);
4222}
4223
4224extern __inline __m512i
4225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4226_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4227{
4228 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4229 (__v8di) __B,
4230 (__v8di)
4231 _mm512_setzero_si512 (),
4232 (__mmask8) __U);
4233}
4234
4235#ifdef __OPTIMIZE__
4236extern __inline __m256i
4237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4238_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4239{
4240 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4241 (__v8si)
0b192937 4242 _mm256_undefined_si256 (),
756c5857
AI
4243 (__mmask8) -1, __R);
4244}
4245
4246extern __inline __m256i
4247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4248_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4249 const int __R)
4250{
4251 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4252 (__v8si) __W,
4253 (__mmask8) __U, __R);
4254}
4255
4256extern __inline __m256i
4257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4258_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4259{
4260 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4261 (__v8si)
4262 _mm256_setzero_si256 (),
4263 (__mmask8) __U, __R);
4264}
4265
4266extern __inline __m256i
4267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4268_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4269{
4270 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4271 (__v8si)
0b192937 4272 _mm256_undefined_si256 (),
756c5857
AI
4273 (__mmask8) -1, __R);
4274}
4275
4276extern __inline __m256i
4277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4278_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4279 const int __R)
4280{
4281 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4282 (__v8si) __W,
4283 (__mmask8) __U, __R);
4284}
4285
4286extern __inline __m256i
4287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4288_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4289{
4290 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4291 (__v8si)
4292 _mm256_setzero_si256 (),
4293 (__mmask8) __U, __R);
4294}
4295#else
4296#define _mm512_cvtt_roundpd_epi32(A, B) \
0b192937 4297 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4298
4299#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4300 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4301
4302#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4303 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4304
4305#define _mm512_cvtt_roundpd_epu32(A, B) \
0b192937 4306 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4307
4308#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4309 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4310
4311#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4312 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4313#endif
4314
4315#ifdef __OPTIMIZE__
4316extern __inline __m256i
4317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4318_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4319{
4320 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4321 (__v8si)
0b192937 4322 _mm256_undefined_si256 (),
756c5857
AI
4323 (__mmask8) -1, __R);
4324}
4325
4326extern __inline __m256i
4327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4328_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4329 const int __R)
4330{
4331 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4332 (__v8si) __W,
4333 (__mmask8) __U, __R);
4334}
4335
4336extern __inline __m256i
4337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4339{
4340 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4341 (__v8si)
4342 _mm256_setzero_si256 (),
4343 (__mmask8) __U, __R);
4344}
4345
4346extern __inline __m256i
4347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4348_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4349{
4350 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4351 (__v8si)
0b192937 4352 _mm256_undefined_si256 (),
756c5857
AI
4353 (__mmask8) -1, __R);
4354}
4355
4356extern __inline __m256i
4357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4358_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4359 const int __R)
4360{
4361 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4362 (__v8si) __W,
4363 (__mmask8) __U, __R);
4364}
4365
4366extern __inline __m256i
4367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4369{
4370 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4371 (__v8si)
4372 _mm256_setzero_si256 (),
4373 (__mmask8) __U, __R);
4374}
4375#else
4376#define _mm512_cvt_roundpd_epi32(A, B) \
0b192937 4377 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4378
4379#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4380 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4381
4382#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4383 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4384
4385#define _mm512_cvt_roundpd_epu32(A, B) \
0b192937 4386 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4387
4388#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4389 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4390
4391#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4392 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4393#endif
4394
4395#ifdef __OPTIMIZE__
4396extern __inline __m512i
4397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4398_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4399{
4400 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4401 (__v16si)
4271e5cb 4402 _mm512_undefined_epi32 (),
756c5857
AI
4403 (__mmask16) -1, __R);
4404}
4405
4406extern __inline __m512i
4407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4409 const int __R)
4410{
4411 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4412 (__v16si) __W,
4413 (__mmask16) __U, __R);
4414}
4415
4416extern __inline __m512i
4417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4418_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4419{
4420 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4421 (__v16si)
4422 _mm512_setzero_si512 (),
4423 (__mmask16) __U, __R);
4424}
4425
4426extern __inline __m512i
4427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4428_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4429{
4430 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4431 (__v16si)
4271e5cb 4432 _mm512_undefined_epi32 (),
756c5857
AI
4433 (__mmask16) -1, __R);
4434}
4435
4436extern __inline __m512i
4437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4438_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4439 const int __R)
4440{
4441 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4442 (__v16si) __W,
4443 (__mmask16) __U, __R);
4444}
4445
4446extern __inline __m512i
4447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4448_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4449{
4450 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4451 (__v16si)
4452 _mm512_setzero_si512 (),
4453 (__mmask16) __U, __R);
4454}
4455#else
4456#define _mm512_cvtt_roundps_epi32(A, B) \
4271e5cb 4457 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4458
4459#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4460 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4461
4462#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4463 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4464
4465#define _mm512_cvtt_roundps_epu32(A, B) \
4271e5cb 4466 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4467
4468#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4469 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4470
4471#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4472 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4473#endif
4474
4475#ifdef __OPTIMIZE__
4476extern __inline __m512i
4477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4478_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4479{
4480 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4481 (__v16si)
4271e5cb 4482 _mm512_undefined_epi32 (),
756c5857
AI
4483 (__mmask16) -1, __R);
4484}
4485
4486extern __inline __m512i
4487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4489 const int __R)
4490{
4491 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4492 (__v16si) __W,
4493 (__mmask16) __U, __R);
4494}
4495
4496extern __inline __m512i
4497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4498_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4499{
4500 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4501 (__v16si)
4502 _mm512_setzero_si512 (),
4503 (__mmask16) __U, __R);
4504}
4505
4506extern __inline __m512i
4507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4508_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4509{
4510 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4511 (__v16si)
4271e5cb 4512 _mm512_undefined_epi32 (),
756c5857
AI
4513 (__mmask16) -1, __R);
4514}
4515
4516extern __inline __m512i
4517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4518_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4519 const int __R)
4520{
4521 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4522 (__v16si) __W,
4523 (__mmask16) __U, __R);
4524}
4525
4526extern __inline __m512i
4527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4528_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4529{
4530 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4531 (__v16si)
4532 _mm512_setzero_si512 (),
4533 (__mmask16) __U, __R);
4534}
4535#else
4536#define _mm512_cvt_roundps_epi32(A, B) \
4271e5cb 4537 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4538
4539#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4540 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4541
4542#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4543 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4544
4545#define _mm512_cvt_roundps_epu32(A, B) \
4271e5cb 4546 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4547
4548#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4549 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4550
4551#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4552 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4553#endif
4554
4555extern __inline __m128d
4556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4557_mm_cvtu32_sd (__m128d __A, unsigned __B)
4558{
4559 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4560}
4561
4562#ifdef __x86_64__
4563#ifdef __OPTIMIZE__
4564extern __inline __m128d
4565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4566_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4567{
4568 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4569}
4570
4571extern __inline __m128d
4572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4574{
4575 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4576}
4577
4578extern __inline __m128d
4579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4581{
4582 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4583}
4584#else
4585#define _mm_cvt_roundu64_sd(A, B, C) \
4586 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4587
4588#define _mm_cvt_roundi64_sd(A, B, C) \
4589 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4590
4591#define _mm_cvt_roundsi64_sd(A, B, C) \
4592 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4593#endif
4594
4595#endif
4596
4597#ifdef __OPTIMIZE__
4598extern __inline __m128
4599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4600_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4601{
4602 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4603}
4604
4605extern __inline __m128
4606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4607_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4608{
4609 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4610}
4611
4612extern __inline __m128
4613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4614_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4615{
4616 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4617}
4618#else
4619#define _mm_cvt_roundu32_ss(A, B, C) \
4620 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4621
4622#define _mm_cvt_roundi32_ss(A, B, C) \
4623 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4624
4625#define _mm_cvt_roundsi32_ss(A, B, C) \
4626 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4627#endif
4628
4629#ifdef __x86_64__
4630#ifdef __OPTIMIZE__
4631extern __inline __m128
4632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4634{
4635 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4636}
4637
4638extern __inline __m128
4639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4640_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4641{
4642 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4643}
4644
4645extern __inline __m128
4646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4647_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4648{
4649 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4650}
4651#else
4652#define _mm_cvt_roundu64_ss(A, B, C) \
4653 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4654
4655#define _mm_cvt_roundi64_ss(A, B, C) \
4656 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4657
4658#define _mm_cvt_roundsi64_ss(A, B, C) \
4659 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4660#endif
4661
4662#endif
4663
4664extern __inline __m128i
4665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4666_mm512_cvtepi32_epi8 (__m512i __A)
4667{
0b192937
UD
4668 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4669 (__v16qi)
4670 _mm_undefined_si128 (),
756c5857
AI
4671 (__mmask16) -1);
4672}
4673
d256b866
IT
4674extern __inline void
4675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4677{
4678 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4679}
4680
756c5857
AI
4681extern __inline __m128i
4682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4683_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4684{
4685 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4686 (__v16qi) __O, __M);
4687}
4688
4689extern __inline __m128i
4690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4691_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4692{
4693 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4694 (__v16qi)
4695 _mm_setzero_si128 (),
4696 __M);
4697}
4698
4699extern __inline __m128i
4700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4701_mm512_cvtsepi32_epi8 (__m512i __A)
4702{
0b192937
UD
4703 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4704 (__v16qi)
4705 _mm_undefined_si128 (),
756c5857
AI
4706 (__mmask16) -1);
4707}
4708
d256b866
IT
4709extern __inline void
4710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4711_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4712{
4713 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4714}
4715
756c5857
AI
4716extern __inline __m128i
4717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4718_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4719{
4720 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4721 (__v16qi) __O, __M);
4722}
4723
4724extern __inline __m128i
4725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4726_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4727{
4728 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4729 (__v16qi)
4730 _mm_setzero_si128 (),
4731 __M);
4732}
4733
4734extern __inline __m128i
4735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4736_mm512_cvtusepi32_epi8 (__m512i __A)
4737{
0b192937
UD
4738 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4739 (__v16qi)
4740 _mm_undefined_si128 (),
756c5857
AI
4741 (__mmask16) -1);
4742}
4743
d256b866
IT
4744extern __inline void
4745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4747{
4748 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4749}
4750
756c5857
AI
4751extern __inline __m128i
4752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4753_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4754{
4755 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4756 (__v16qi) __O,
4757 __M);
4758}
4759
4760extern __inline __m128i
4761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4763{
4764 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4765 (__v16qi)
4766 _mm_setzero_si128 (),
4767 __M);
4768}
4769
4770extern __inline __m256i
4771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772_mm512_cvtepi32_epi16 (__m512i __A)
4773{
0b192937
UD
4774 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4775 (__v16hi)
4776 _mm256_undefined_si256 (),
756c5857
AI
4777 (__mmask16) -1);
4778}
4779
d256b866
IT
4780extern __inline void
4781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4782_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4783{
4784 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4785}
4786
756c5857
AI
4787extern __inline __m256i
4788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4789_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4790{
4791 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4792 (__v16hi) __O, __M);
4793}
4794
4795extern __inline __m256i
4796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4797_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4798{
4799 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4800 (__v16hi)
4801 _mm256_setzero_si256 (),
4802 __M);
4803}
4804
4805extern __inline __m256i
4806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4807_mm512_cvtsepi32_epi16 (__m512i __A)
4808{
0b192937
UD
4809 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4810 (__v16hi)
4811 _mm256_undefined_si256 (),
756c5857
AI
4812 (__mmask16) -1);
4813}
4814
d256b866
IT
4815extern __inline void
4816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4817_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4818{
4819 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4820}
4821
756c5857
AI
4822extern __inline __m256i
4823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4824_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4825{
4826 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4827 (__v16hi) __O, __M);
4828}
4829
4830extern __inline __m256i
4831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4832_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4833{
4834 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4835 (__v16hi)
4836 _mm256_setzero_si256 (),
4837 __M);
4838}
4839
4840extern __inline __m256i
4841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4842_mm512_cvtusepi32_epi16 (__m512i __A)
4843{
0b192937
UD
4844 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4845 (__v16hi)
4846 _mm256_undefined_si256 (),
756c5857
AI
4847 (__mmask16) -1);
4848}
4849
d256b866
IT
4850extern __inline void
4851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4853{
4854 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4855}
4856
756c5857
AI
4857extern __inline __m256i
4858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4860{
4861 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4862 (__v16hi) __O,
4863 __M);
4864}
4865
4866extern __inline __m256i
4867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4868_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4869{
4870 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4871 (__v16hi)
4872 _mm256_setzero_si256 (),
4873 __M);
4874}
4875
4876extern __inline __m256i
4877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4878_mm512_cvtepi64_epi32 (__m512i __A)
4879{
0b192937
UD
4880 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4881 (__v8si)
4882 _mm256_undefined_si256 (),
756c5857
AI
4883 (__mmask8) -1);
4884}
4885
d256b866
IT
4886extern __inline void
4887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4888_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4889{
4890 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4891}
4892
756c5857
AI
4893extern __inline __m256i
4894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4895_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4896{
4897 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4898 (__v8si) __O, __M);
4899}
4900
4901extern __inline __m256i
4902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4904{
4905 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4906 (__v8si)
4907 _mm256_setzero_si256 (),
4908 __M);
4909}
4910
4911extern __inline __m256i
4912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4913_mm512_cvtsepi64_epi32 (__m512i __A)
4914{
0b192937
UD
4915 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4916 (__v8si)
4917 _mm256_undefined_si256 (),
756c5857
AI
4918 (__mmask8) -1);
4919}
4920
d256b866
IT
4921extern __inline void
4922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4924{
4925 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4926}
4927
756c5857
AI
4928extern __inline __m256i
4929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4931{
4932 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4933 (__v8si) __O, __M);
4934}
4935
4936extern __inline __m256i
4937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4938_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4939{
4940 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4941 (__v8si)
4942 _mm256_setzero_si256 (),
4943 __M);
4944}
4945
4946extern __inline __m256i
4947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4948_mm512_cvtusepi64_epi32 (__m512i __A)
4949{
0b192937
UD
4950 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4951 (__v8si)
4952 _mm256_undefined_si256 (),
756c5857
AI
4953 (__mmask8) -1);
4954}
4955
6fb82517 4956extern __inline void
d256b866
IT
4957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4959{
4960 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4961}
4962
756c5857
AI
4963extern __inline __m256i
4964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4965_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4966{
4967 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4968 (__v8si) __O, __M);
4969}
4970
4971extern __inline __m256i
4972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4973_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4974{
4975 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4976 (__v8si)
4977 _mm256_setzero_si256 (),
4978 __M);
4979}
4980
4981extern __inline __m128i
4982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4983_mm512_cvtepi64_epi16 (__m512i __A)
4984{
0b192937
UD
4985 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4986 (__v8hi)
4987 _mm_undefined_si128 (),
756c5857
AI
4988 (__mmask8) -1);
4989}
4990
d256b866
IT
4991extern __inline void
4992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4993_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4994{
4995 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4996}
4997
756c5857
AI
4998extern __inline __m128i
4999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5001{
5002 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5003 (__v8hi) __O, __M);
5004}
5005
5006extern __inline __m128i
5007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5008_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5009{
5010 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5011 (__v8hi)
5012 _mm_setzero_si128 (),
5013 __M);
5014}
5015
5016extern __inline __m128i
5017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5018_mm512_cvtsepi64_epi16 (__m512i __A)
5019{
0b192937
UD
5020 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5021 (__v8hi)
5022 _mm_undefined_si128 (),
756c5857
AI
5023 (__mmask8) -1);
5024}
5025
d256b866
IT
5026extern __inline void
5027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5028_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5029{
5030 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5031}
5032
756c5857
AI
5033extern __inline __m128i
5034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5036{
5037 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5038 (__v8hi) __O, __M);
5039}
5040
5041extern __inline __m128i
5042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5043_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5044{
5045 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5046 (__v8hi)
5047 _mm_setzero_si128 (),
5048 __M);
5049}
5050
5051extern __inline __m128i
5052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5053_mm512_cvtusepi64_epi16 (__m512i __A)
5054{
0b192937
UD
5055 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5056 (__v8hi)
5057 _mm_undefined_si128 (),
756c5857
AI
5058 (__mmask8) -1);
5059}
5060
d256b866
IT
5061extern __inline void
5062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5063_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5064{
5065 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5066}
5067
756c5857
AI
5068extern __inline __m128i
5069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5070_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5071{
5072 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5073 (__v8hi) __O, __M);
5074}
5075
5076extern __inline __m128i
5077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5078_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5079{
5080 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5081 (__v8hi)
5082 _mm_setzero_si128 (),
5083 __M);
5084}
5085
5086extern __inline __m128i
5087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5088_mm512_cvtepi64_epi8 (__m512i __A)
5089{
0b192937
UD
5090 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5091 (__v16qi)
5092 _mm_undefined_si128 (),
756c5857
AI
5093 (__mmask8) -1);
5094}
5095
d256b866
IT
5096extern __inline void
5097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5098_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5099{
5100 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5101}
5102
756c5857
AI
5103extern __inline __m128i
5104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5106{
5107 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5108 (__v16qi) __O, __M);
5109}
5110
5111extern __inline __m128i
5112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5113_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5114{
5115 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5116 (__v16qi)
5117 _mm_setzero_si128 (),
5118 __M);
5119}
5120
5121extern __inline __m128i
5122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5123_mm512_cvtsepi64_epi8 (__m512i __A)
5124{
0b192937
UD
5125 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5126 (__v16qi)
5127 _mm_undefined_si128 (),
756c5857
AI
5128 (__mmask8) -1);
5129}
5130
d256b866
IT
5131extern __inline void
5132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5134{
5135 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5136}
5137
756c5857
AI
5138extern __inline __m128i
5139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5141{
5142 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5143 (__v16qi) __O, __M);
5144}
5145
5146extern __inline __m128i
5147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5148_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5149{
5150 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5151 (__v16qi)
5152 _mm_setzero_si128 (),
5153 __M);
5154}
5155
5156extern __inline __m128i
5157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5158_mm512_cvtusepi64_epi8 (__m512i __A)
5159{
0b192937
UD
5160 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5161 (__v16qi)
5162 _mm_undefined_si128 (),
756c5857
AI
5163 (__mmask8) -1);
5164}
5165
d256b866
IT
5166extern __inline void
5167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5169{
5170 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5171}
5172
756c5857
AI
5173extern __inline __m128i
5174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5175_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5176{
5177 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5178 (__v16qi) __O,
5179 __M);
5180}
5181
5182extern __inline __m128i
5183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5185{
5186 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5187 (__v16qi)
5188 _mm_setzero_si128 (),
5189 __M);
5190}
5191
5192extern __inline __m512d
5193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5194_mm512_cvtepi32_pd (__m256i __A)
5195{
5196 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5197 (__v8df)
0b192937 5198 _mm512_undefined_pd (),
756c5857
AI
5199 (__mmask8) -1);
5200}
5201
5202extern __inline __m512d
5203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5204_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5205{
5206 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5207 (__v8df) __W,
5208 (__mmask8) __U);
5209}
5210
5211extern __inline __m512d
5212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5213_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5214{
5215 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5216 (__v8df)
5217 _mm512_setzero_pd (),
5218 (__mmask8) __U);
5219}
5220
5221extern __inline __m512d
5222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5223_mm512_cvtepu32_pd (__m256i __A)
5224{
5225 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5226 (__v8df)
0b192937 5227 _mm512_undefined_pd (),
756c5857
AI
5228 (__mmask8) -1);
5229}
5230
5231extern __inline __m512d
5232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5233_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5234{
5235 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5236 (__v8df) __W,
5237 (__mmask8) __U);
5238}
5239
5240extern __inline __m512d
5241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5242_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5243{
5244 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5245 (__v8df)
5246 _mm512_setzero_pd (),
5247 (__mmask8) __U);
5248}
5249
5250#ifdef __OPTIMIZE__
5251extern __inline __m512
5252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5253_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5254{
5255 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5256 (__v16sf)
0b192937 5257 _mm512_undefined_ps (),
756c5857
AI
5258 (__mmask16) -1, __R);
5259}
5260
5261extern __inline __m512
5262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5263_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5264 const int __R)
5265{
5266 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5267 (__v16sf) __W,
5268 (__mmask16) __U, __R);
5269}
5270
5271extern __inline __m512
5272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5273_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5274{
5275 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5276 (__v16sf)
5277 _mm512_setzero_ps (),
5278 (__mmask16) __U, __R);
5279}
5280
5281extern __inline __m512
5282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5283_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5284{
5285 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5286 (__v16sf)
0b192937 5287 _mm512_undefined_ps (),
756c5857
AI
5288 (__mmask16) -1, __R);
5289}
5290
5291extern __inline __m512
5292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5293_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5294 const int __R)
5295{
5296 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5297 (__v16sf) __W,
5298 (__mmask16) __U, __R);
5299}
5300
5301extern __inline __m512
5302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5303_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5304{
5305 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5306 (__v16sf)
5307 _mm512_setzero_ps (),
5308 (__mmask16) __U, __R);
5309}
5310
5311#else
5312#define _mm512_cvt_roundepi32_ps(A, B) \
0b192937 5313 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5314
5315#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5316 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5317
5318#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5319 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5320
5321#define _mm512_cvt_roundepu32_ps(A, B) \
0b192937 5322 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5323
5324#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5325 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5326
5327#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5328 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5329#endif
5330
5331#ifdef __OPTIMIZE__
5332extern __inline __m256d
5333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5334_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5335{
5336 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5337 __imm,
5338 (__v4df)
0b192937 5339 _mm256_undefined_pd (),
756c5857
AI
5340 (__mmask8) -1);
5341}
5342
5343extern __inline __m256d
5344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5345_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5346 const int __imm)
5347{
5348 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5349 __imm,
5350 (__v4df) __W,
5351 (__mmask8) __U);
5352}
5353
5354extern __inline __m256d
5355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5356_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5357{
5358 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5359 __imm,
5360 (__v4df)
5361 _mm256_setzero_pd (),
5362 (__mmask8) __U);
5363}
5364
5365extern __inline __m128
5366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5367_mm512_extractf32x4_ps (__m512 __A, const int __imm)
5368{
5369 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5370 __imm,
5371 (__v4sf)
0b192937 5372 _mm_undefined_ps (),
756c5857
AI
5373 (__mmask8) -1);
5374}
5375
5376extern __inline __m128
5377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5378_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5379 const int __imm)
5380{
5381 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5382 __imm,
5383 (__v4sf) __W,
5384 (__mmask8) __U);
5385}
5386
5387extern __inline __m128
5388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5389_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5390{
5391 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5392 __imm,
5393 (__v4sf)
5394 _mm_setzero_ps (),
5395 (__mmask8) __U);
5396}
5397
5398extern __inline __m256i
5399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5400_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5401{
5402 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5403 __imm,
5404 (__v4di)
0b192937 5405 _mm256_undefined_si256 (),
756c5857
AI
5406 (__mmask8) -1);
5407}
5408
5409extern __inline __m256i
5410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5411_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5412 const int __imm)
5413{
5414 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5415 __imm,
5416 (__v4di) __W,
5417 (__mmask8) __U);
5418}
5419
5420extern __inline __m256i
5421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5422_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5423{
5424 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5425 __imm,
5426 (__v4di)
5427 _mm256_setzero_si256 (),
5428 (__mmask8) __U);
5429}
5430
5431extern __inline __m128i
5432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5433_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5434{
5435 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5436 __imm,
5437 (__v4si)
0b192937 5438 _mm_undefined_si128 (),
756c5857
AI
5439 (__mmask8) -1);
5440}
5441
5442extern __inline __m128i
5443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5444_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5445 const int __imm)
5446{
5447 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5448 __imm,
5449 (__v4si) __W,
5450 (__mmask8) __U);
5451}
5452
5453extern __inline __m128i
5454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5455_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5456{
5457 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5458 __imm,
5459 (__v4si)
5460 _mm_setzero_si128 (),
5461 (__mmask8) __U);
5462}
5463#else
5464
5465#define _mm512_extractf64x4_pd(X, C) \
5466 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5467 (int) (C),\
0b192937 5468 (__v4df)(__m256d)_mm256_undefined_pd(),\
756c5857
AI
5469 (__mmask8)-1))
5470
5471#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5472 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5473 (int) (C),\
5474 (__v4df)(__m256d)(W),\
5475 (__mmask8)(U)))
5476
5477#define _mm512_maskz_extractf64x4_pd(U, X, C) \
5478 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5479 (int) (C),\
5480 (__v4df)(__m256d)_mm256_setzero_pd(),\
5481 (__mmask8)(U)))
5482
5483#define _mm512_extractf32x4_ps(X, C) \
5484 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5485 (int) (C),\
0b192937 5486 (__v4sf)(__m128)_mm_undefined_ps(),\
756c5857
AI
5487 (__mmask8)-1))
5488
5489#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5490 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5491 (int) (C),\
5492 (__v4sf)(__m128)(W),\
5493 (__mmask8)(U)))
5494
5495#define _mm512_maskz_extractf32x4_ps(U, X, C) \
5496 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5497 (int) (C),\
5498 (__v4sf)(__m128)_mm_setzero_ps(),\
5499 (__mmask8)(U)))
5500
5501#define _mm512_extracti64x4_epi64(X, C) \
5502 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5503 (int) (C),\
0b192937 5504 (__v4di)(__m256i)_mm256_undefined_si256 (),\
756c5857
AI
5505 (__mmask8)-1))
5506
5507#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5508 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5509 (int) (C),\
5510 (__v4di)(__m256i)(W),\
5511 (__mmask8)(U)))
5512
5513#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5514 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5515 (int) (C),\
5516 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5517 (__mmask8)(U)))
5518
5519#define _mm512_extracti32x4_epi32(X, C) \
5520 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5521 (int) (C),\
0b192937 5522 (__v4si)(__m128i)_mm_undefined_si128 (),\
756c5857
AI
5523 (__mmask8)-1))
5524
5525#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5526 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5527 (int) (C),\
5528 (__v4si)(__m128i)(W),\
5529 (__mmask8)(U)))
5530
5531#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5532 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5533 (int) (C),\
5534 (__v4si)(__m128i)_mm_setzero_si128 (),\
5535 (__mmask8)(U)))
5536#endif
5537
5538#ifdef __OPTIMIZE__
5539extern __inline __m512i
5540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5541_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5542{
5543 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5544 (__v4si) __B,
5545 __imm,
5546 (__v16si) __A, -1);
5547}
5548
5549extern __inline __m512
5550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5552{
5553 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5554 (__v4sf) __B,
5555 __imm,
5556 (__v16sf) __A, -1);
5557}
5558
5559extern __inline __m512i
5560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5561_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5562{
5563 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5564 (__v4di) __B,
5565 __imm,
5566 (__v8di)
4271e5cb 5567 _mm512_undefined_epi32 (),
756c5857
AI
5568 (__mmask8) -1);
5569}
5570
5571extern __inline __m512i
5572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5573_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5574 __m256i __B, const int __imm)
5575{
5576 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5577 (__v4di) __B,
5578 __imm,
5579 (__v8di) __W,
5580 (__mmask8) __U);
5581}
5582
5583extern __inline __m512i
5584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5586 const int __imm)
5587{
5588 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5589 (__v4di) __B,
5590 __imm,
5591 (__v8di)
5592 _mm512_setzero_si512 (),
5593 (__mmask8) __U);
5594}
5595
5596extern __inline __m512d
5597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5598_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5599{
5600 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5601 (__v4df) __B,
5602 __imm,
5603 (__v8df)
0b192937 5604 _mm512_undefined_pd (),
756c5857
AI
5605 (__mmask8) -1);
5606}
5607
5608extern __inline __m512d
5609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5610_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5611 __m256d __B, const int __imm)
5612{
5613 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5614 (__v4df) __B,
5615 __imm,
5616 (__v8df) __W,
5617 (__mmask8) __U);
5618}
5619
5620extern __inline __m512d
5621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5622_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5623 const int __imm)
5624{
5625 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5626 (__v4df) __B,
5627 __imm,
5628 (__v8df)
5629 _mm512_setzero_pd (),
5630 (__mmask8) __U);
5631}
5632#else
5633#define _mm512_insertf32x4(X, Y, C) \
5634 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5635 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5636
5637#define _mm512_inserti32x4(X, Y, C) \
5638 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5639 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5640
5641#define _mm512_insertf64x4(X, Y, C) \
5642 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5643 (__v4df)(__m256d) (Y), (int) (C), \
0b192937 5644 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
5645 (__mmask8)-1))
5646
5647#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5648 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5649 (__v4df)(__m256d) (Y), (int) (C), \
5650 (__v8df)(__m512d)(W), \
5651 (__mmask8)(U)))
5652
5653#define _mm512_maskz_insertf64x4(U, X, Y, C) \
5654 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5655 (__v4df)(__m256d) (Y), (int) (C), \
5656 (__v8df)(__m512d)_mm512_setzero_pd(), \
5657 (__mmask8)(U)))
5658
5659#define _mm512_inserti64x4(X, Y, C) \
5660 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5661 (__v4di)(__m256i) (Y), (int) (C), \
4271e5cb 5662 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
5663 (__mmask8)-1))
5664
5665#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5666 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5667 (__v4di)(__m256i) (Y), (int) (C),\
5668 (__v8di)(__m512i)(W),\
5669 (__mmask8)(U)))
5670
5671#define _mm512_maskz_inserti64x4(U, X, Y, C) \
5672 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5673 (__v4di)(__m256i) (Y), (int) (C), \
5674 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5675 (__mmask8)(U)))
5676#endif
5677
5678extern __inline __m512d
5679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5680_mm512_loadu_pd (void const *__P)
5681{
c6b0037d 5682 return *(__m512d_u *)__P;
756c5857
AI
5683}
5684
5685extern __inline __m512d
5686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5688{
fc9cf6da 5689 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
5690 (__v8df) __W,
5691 (__mmask8) __U);
5692}
5693
5694extern __inline __m512d
5695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5697{
fc9cf6da 5698 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
5699 (__v8df)
5700 _mm512_setzero_pd (),
5701 (__mmask8) __U);
5702}
5703
5704extern __inline void
5705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5706_mm512_storeu_pd (void *__P, __m512d __A)
5707{
c6b0037d 5708 *(__m512d_u *)__P = __A;
756c5857
AI
5709}
5710
5711extern __inline void
5712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5713_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5714{
fc9cf6da 5715 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
756c5857
AI
5716 (__mmask8) __U);
5717}
5718
5719extern __inline __m512
5720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5721_mm512_loadu_ps (void const *__P)
5722{
c6b0037d 5723 return *(__m512_u *)__P;
756c5857
AI
5724}
5725
5726extern __inline __m512
5727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5728_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5729{
fc9cf6da 5730 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
5731 (__v16sf) __W,
5732 (__mmask16) __U);
5733}
5734
5735extern __inline __m512
5736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5737_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5738{
fc9cf6da 5739 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
5740 (__v16sf)
5741 _mm512_setzero_ps (),
5742 (__mmask16) __U);
5743}
5744
5745extern __inline void
5746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747_mm512_storeu_ps (void *__P, __m512 __A)
5748{
c6b0037d 5749 *(__m512_u *)__P = __A;
756c5857
AI
5750}
5751
5752extern __inline void
5753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5754_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5755{
fc9cf6da 5756 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
756c5857
AI
5757 (__mmask16) __U);
5758}
5759
5760extern __inline __m512i
5761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5762_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5763{
fc9cf6da 5764 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
5765 (__v8di) __W,
5766 (__mmask8) __U);
5767}
5768
5769extern __inline __m512i
5770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5772{
fc9cf6da 5773 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
5774 (__v8di)
5775 _mm512_setzero_si512 (),
5776 (__mmask8) __U);
5777}
5778
5779extern __inline void
5780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5782{
fc9cf6da 5783 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
756c5857
AI
5784 (__mmask8) __U);
5785}
5786
5787extern __inline __m512i
5788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 5789_mm512_loadu_si512 (void const *__P)
756c5857 5790{
c6b0037d 5791 return *(__m512i_u *)__P;
756c5857
AI
5792}
5793
5794extern __inline __m512i
5795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5796_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5797{
fc9cf6da 5798 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
5799 (__v16si) __W,
5800 (__mmask16) __U);
5801}
5802
5803extern __inline __m512i
5804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5805_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5806{
fc9cf6da 5807 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
5808 (__v16si)
5809 _mm512_setzero_si512 (),
5810 (__mmask16) __U);
5811}
5812
5813extern __inline void
5814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 5815_mm512_storeu_si512 (void *__P, __m512i __A)
756c5857 5816{
c6b0037d 5817 *(__m512i_u *)__P = __A;
756c5857
AI
5818}
5819
5820extern __inline void
5821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5822_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5823{
fc9cf6da 5824 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
756c5857
AI
5825 (__mmask16) __U);
5826}
5827
5828extern __inline __m512d
5829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5830_mm512_permutevar_pd (__m512d __A, __m512i __C)
5831{
5832 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5833 (__v8di) __C,
5834 (__v8df)
0b192937 5835 _mm512_undefined_pd (),
756c5857
AI
5836 (__mmask8) -1);
5837}
5838
5839extern __inline __m512d
5840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5841_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5842{
5843 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5844 (__v8di) __C,
5845 (__v8df) __W,
5846 (__mmask8) __U);
5847}
5848
5849extern __inline __m512d
5850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5851_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5852{
5853 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5854 (__v8di) __C,
5855 (__v8df)
5856 _mm512_setzero_pd (),
5857 (__mmask8) __U);
5858}
5859
5860extern __inline __m512
5861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5862_mm512_permutevar_ps (__m512 __A, __m512i __C)
5863{
5864 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5865 (__v16si) __C,
5866 (__v16sf)
0b192937 5867 _mm512_undefined_ps (),
756c5857
AI
5868 (__mmask16) -1);
5869}
5870
5871extern __inline __m512
5872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5873_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5874{
5875 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5876 (__v16si) __C,
5877 (__v16sf) __W,
5878 (__mmask16) __U);
5879}
5880
5881extern __inline __m512
5882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5883_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5884{
5885 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5886 (__v16si) __C,
5887 (__v16sf)
5888 _mm512_setzero_ps (),
5889 (__mmask16) __U);
5890}
5891
5892extern __inline __m512i
5893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5894_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5895{
5896 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5897 /* idx */ ,
5898 (__v8di) __A,
5899 (__v8di) __B,
5900 (__mmask8) -1);
5901}
5902
5903extern __inline __m512i
5904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5906 __m512i __B)
5907{
5908 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5909 /* idx */ ,
5910 (__v8di) __A,
5911 (__v8di) __B,
5912 (__mmask8) __U);
5913}
5914
5915extern __inline __m512i
5916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5917_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5918 __mmask8 __U, __m512i __B)
5919{
5920 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5921 (__v8di) __I
5922 /* idx */ ,
5923 (__v8di) __B,
5924 (__mmask8) __U);
5925}
5926
5927extern __inline __m512i
5928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5929_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5930 __m512i __I, __m512i __B)
5931{
5932 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5933 /* idx */ ,
5934 (__v8di) __A,
5935 (__v8di) __B,
5936 (__mmask8) __U);
5937}
5938
5939extern __inline __m512i
5940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5941_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5942{
5943 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5944 /* idx */ ,
5945 (__v16si) __A,
5946 (__v16si) __B,
5947 (__mmask16) -1);
5948}
5949
5950extern __inline __m512i
5951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5952_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5953 __m512i __I, __m512i __B)
5954{
5955 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5956 /* idx */ ,
5957 (__v16si) __A,
5958 (__v16si) __B,
5959 (__mmask16) __U);
5960}
5961
5962extern __inline __m512i
5963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5964_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5965 __mmask16 __U, __m512i __B)
5966{
5967 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5968 (__v16si) __I
5969 /* idx */ ,
5970 (__v16si) __B,
5971 (__mmask16) __U);
5972}
5973
5974extern __inline __m512i
5975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5976_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5977 __m512i __I, __m512i __B)
5978{
5979 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5980 /* idx */ ,
5981 (__v16si) __A,
5982 (__v16si) __B,
5983 (__mmask16) __U);
5984}
5985
5986extern __inline __m512d
5987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5988_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5989{
5990 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5991 /* idx */ ,
5992 (__v8df) __A,
5993 (__v8df) __B,
5994 (__mmask8) -1);
5995}
5996
5997extern __inline __m512d
5998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5999_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6000 __m512d __B)
6001{
6002 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6003 /* idx */ ,
6004 (__v8df) __A,
6005 (__v8df) __B,
6006 (__mmask8) __U);
6007}
6008
6009extern __inline __m512d
6010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6011_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6012 __m512d __B)
6013{
6014 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6015 (__v8di) __I
6016 /* idx */ ,
6017 (__v8df) __B,
6018 (__mmask8) __U);
6019}
6020
6021extern __inline __m512d
6022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6023_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6024 __m512d __B)
6025{
6026 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6027 /* idx */ ,
6028 (__v8df) __A,
6029 (__v8df) __B,
6030 (__mmask8) __U);
6031}
6032
6033extern __inline __m512
6034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6035_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6036{
6037 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6038 /* idx */ ,
6039 (__v16sf) __A,
6040 (__v16sf) __B,
6041 (__mmask16) -1);
6042}
6043
6044extern __inline __m512
6045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6046_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6047{
6048 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6049 /* idx */ ,
6050 (__v16sf) __A,
6051 (__v16sf) __B,
6052 (__mmask16) __U);
6053}
6054
6055extern __inline __m512
6056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6057_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6058 __m512 __B)
6059{
6060 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6061 (__v16si) __I
6062 /* idx */ ,
6063 (__v16sf) __B,
6064 (__mmask16) __U);
6065}
6066
6067extern __inline __m512
6068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6069_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6070 __m512 __B)
6071{
6072 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6073 /* idx */ ,
6074 (__v16sf) __A,
6075 (__v16sf) __B,
6076 (__mmask16) __U);
6077}
6078
6079#ifdef __OPTIMIZE__
6080extern __inline __m512d
6081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6082_mm512_permute_pd (__m512d __X, const int __C)
6083{
6084 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6085 (__v8df)
0b192937 6086 _mm512_undefined_pd (),
756c5857
AI
6087 (__mmask8) -1);
6088}
6089
6090extern __inline __m512d
6091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6093{
6094 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6095 (__v8df) __W,
6096 (__mmask8) __U);
6097}
6098
6099extern __inline __m512d
6100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6101_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6102{
6103 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6104 (__v8df)
6105 _mm512_setzero_pd (),
6106 (__mmask8) __U);
6107}
6108
6109extern __inline __m512
6110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6111_mm512_permute_ps (__m512 __X, const int __C)
6112{
6113 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6114 (__v16sf)
0b192937 6115 _mm512_undefined_ps (),
756c5857
AI
6116 (__mmask16) -1);
6117}
6118
6119extern __inline __m512
6120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6121_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6122{
6123 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6124 (__v16sf) __W,
6125 (__mmask16) __U);
6126}
6127
6128extern __inline __m512
6129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6130_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6131{
6132 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6133 (__v16sf)
6134 _mm512_setzero_ps (),
6135 (__mmask16) __U);
6136}
6137#else
6138#define _mm512_permute_pd(X, C) \
6139 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
0b192937 6140 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6141 (__mmask8)(-1)))
6142
6143#define _mm512_mask_permute_pd(W, U, X, C) \
6144 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6145 (__v8df)(__m512d)(W), \
6146 (__mmask8)(U)))
6147
6148#define _mm512_maskz_permute_pd(U, X, C) \
6149 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6150 (__v8df)(__m512d)_mm512_setzero_pd(), \
6151 (__mmask8)(U)))
6152
6153#define _mm512_permute_ps(X, C) \
6154 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
0b192937 6155 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6156 (__mmask16)(-1)))
6157
6158#define _mm512_mask_permute_ps(W, U, X, C) \
6159 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6160 (__v16sf)(__m512)(W), \
6161 (__mmask16)(U)))
6162
6163#define _mm512_maskz_permute_ps(U, X, C) \
6164 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6165 (__v16sf)(__m512)_mm512_setzero_ps(), \
6166 (__mmask16)(U)))
6167#endif
6168
6169#ifdef __OPTIMIZE__
6170extern __inline __m512i
6171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172_mm512_permutex_epi64 (__m512i __X, const int __I)
6173{
6174 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6175 (__v8di)
4271e5cb 6176 _mm512_undefined_epi32 (),
756c5857
AI
6177 (__mmask8) (-1));
6178}
6179
6180extern __inline __m512i
6181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6183 __m512i __X, const int __I)
6184{
6185 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6186 (__v8di) __W,
6187 (__mmask8) __M);
6188}
6189
6190extern __inline __m512i
6191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6192_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6193{
6194 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6195 (__v8di)
6196 _mm512_setzero_si512 (),
6197 (__mmask8) __M);
6198}
6199
6200extern __inline __m512d
6201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6202_mm512_permutex_pd (__m512d __X, const int __M)
6203{
6204 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6205 (__v8df)
0b192937 6206 _mm512_undefined_pd (),
756c5857
AI
6207 (__mmask8) -1);
6208}
6209
6210extern __inline __m512d
6211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6212_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6213{
6214 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6215 (__v8df) __W,
6216 (__mmask8) __U);
6217}
6218
6219extern __inline __m512d
6220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6221_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6222{
6223 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6224 (__v8df)
6225 _mm512_setzero_pd (),
6226 (__mmask8) __U);
6227}
6228#else
6229#define _mm512_permutex_pd(X, M) \
6230 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
0b192937
UD
6231 (__v8df)(__m512d)_mm512_undefined_pd(),\
6232 (__mmask8)-1))
756c5857
AI
6233
6234#define _mm512_mask_permutex_pd(W, U, X, M) \
6235 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6236 (__v8df)(__m512d)(W), (__mmask8)(U)))
6237
6238#define _mm512_maskz_permutex_pd(U, X, M) \
6239 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6240 (__v8df)(__m512d)_mm512_setzero_pd(),\
6241 (__mmask8)(U)))
6242
6243#define _mm512_permutex_epi64(X, I) \
6244 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6245 (int)(I), \
0b192937 6246 (__v8di)(__m512i) \
4271e5cb 6247 (_mm512_undefined_epi32 ()),\
756c5857
AI
6248 (__mmask8)(-1)))
6249
6250#define _mm512_maskz_permutex_epi64(M, X, I) \
6251 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6252 (int)(I), \
6253 (__v8di)(__m512i) \
6254 (_mm512_setzero_si512 ()),\
6255 (__mmask8)(M)))
6256
6257#define _mm512_mask_permutex_epi64(W, M, X, I) \
6258 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6259 (int)(I), \
6260 (__v8di)(__m512i)(W), \
6261 (__mmask8)(M)))
6262#endif
6263
6264extern __inline __m512i
6265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6266_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6267{
583a9919
KY
6268 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6269 (__v8di) __X,
756c5857
AI
6270 (__v8di)
6271 _mm512_setzero_si512 (),
6272 __M);
6273}
6274
6275extern __inline __m512i
6276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6277_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6278{
583a9919
KY
6279 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6280 (__v8di) __X,
756c5857 6281 (__v8di)
4271e5cb 6282 _mm512_undefined_epi32 (),
756c5857
AI
6283 (__mmask8) -1);
6284}
6285
6286extern __inline __m512i
6287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6288_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6289 __m512i __Y)
6290{
583a9919
KY
6291 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6292 (__v8di) __X,
756c5857
AI
6293 (__v8di) __W,
6294 __M);
6295}
6296
6297extern __inline __m512i
6298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6299_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6300{
583a9919
KY
6301 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6302 (__v16si) __X,
756c5857
AI
6303 (__v16si)
6304 _mm512_setzero_si512 (),
6305 __M);
6306}
6307
6308extern __inline __m512i
6309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6310_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6311{
583a9919
KY
6312 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6313 (__v16si) __X,
756c5857 6314 (__v16si)
4271e5cb 6315 _mm512_undefined_epi32 (),
756c5857
AI
6316 (__mmask16) -1);
6317}
6318
6319extern __inline __m512i
6320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6321_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6322 __m512i __Y)
6323{
583a9919
KY
6324 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6325 (__v16si) __X,
756c5857
AI
6326 (__v16si) __W,
6327 __M);
6328}
6329
6330extern __inline __m512d
6331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6332_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6333{
6334 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6335 (__v8di) __X,
6336 (__v8df)
0b192937 6337 _mm512_undefined_pd (),
756c5857
AI
6338 (__mmask8) -1);
6339}
6340
6341extern __inline __m512d
6342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6343_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6344{
6345 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6346 (__v8di) __X,
6347 (__v8df) __W,
6348 (__mmask8) __U);
6349}
6350
6351extern __inline __m512d
6352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6353_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6354{
6355 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6356 (__v8di) __X,
6357 (__v8df)
6358 _mm512_setzero_pd (),
6359 (__mmask8) __U);
6360}
6361
6362extern __inline __m512
6363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6365{
6366 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6367 (__v16si) __X,
6368 (__v16sf)
0b192937 6369 _mm512_undefined_ps (),
756c5857
AI
6370 (__mmask16) -1);
6371}
6372
6373extern __inline __m512
6374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6375_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6376{
6377 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6378 (__v16si) __X,
6379 (__v16sf) __W,
6380 (__mmask16) __U);
6381}
6382
6383extern __inline __m512
6384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6385_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6386{
6387 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6388 (__v16si) __X,
6389 (__v16sf)
6390 _mm512_setzero_ps (),
6391 (__mmask16) __U);
6392}
6393
6394#ifdef __OPTIMIZE__
6395extern __inline __m512
6396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6397_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6398{
6399 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6400 (__v16sf) __V, __imm,
6401 (__v16sf)
0b192937 6402 _mm512_undefined_ps (),
756c5857
AI
6403 (__mmask16) -1);
6404}
6405
6406extern __inline __m512
6407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6408_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6409 __m512 __V, const int __imm)
6410{
6411 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6412 (__v16sf) __V, __imm,
6413 (__v16sf) __W,
6414 (__mmask16) __U);
6415}
6416
6417extern __inline __m512
6418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6419_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6420{
6421 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6422 (__v16sf) __V, __imm,
6423 (__v16sf)
6424 _mm512_setzero_ps (),
6425 (__mmask16) __U);
6426}
6427
6428extern __inline __m512d
6429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6430_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6431{
6432 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6433 (__v8df) __V, __imm,
6434 (__v8df)
0b192937 6435 _mm512_undefined_pd (),
756c5857
AI
6436 (__mmask8) -1);
6437}
6438
6439extern __inline __m512d
6440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6441_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6442 __m512d __V, const int __imm)
6443{
6444 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6445 (__v8df) __V, __imm,
6446 (__v8df) __W,
6447 (__mmask8) __U);
6448}
6449
6450extern __inline __m512d
6451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6452_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6453 const int __imm)
6454{
6455 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6456 (__v8df) __V, __imm,
6457 (__v8df)
6458 _mm512_setzero_pd (),
6459 (__mmask8) __U);
6460}
6461
6462extern __inline __m512d
6463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6464_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6465 const int __imm, const int __R)
6466{
6467 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6468 (__v8df) __B,
6469 (__v8di) __C,
6470 __imm,
6471 (__mmask8) -1, __R);
6472}
6473
6474extern __inline __m512d
6475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6476_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6477 __m512i __C, const int __imm, const int __R)
6478{
6479 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6480 (__v8df) __B,
6481 (__v8di) __C,
6482 __imm,
6483 (__mmask8) __U, __R);
6484}
6485
6486extern __inline __m512d
6487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6488_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6489 __m512i __C, const int __imm, const int __R)
6490{
6491 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6492 (__v8df) __B,
6493 (__v8di) __C,
6494 __imm,
6495 (__mmask8) __U, __R);
6496}
6497
6498extern __inline __m512
6499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6500_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6501 const int __imm, const int __R)
6502{
6503 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6504 (__v16sf) __B,
6505 (__v16si) __C,
6506 __imm,
6507 (__mmask16) -1, __R);
6508}
6509
6510extern __inline __m512
6511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6512_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6513 __m512i __C, const int __imm, const int __R)
6514{
6515 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6516 (__v16sf) __B,
6517 (__v16si) __C,
6518 __imm,
6519 (__mmask16) __U, __R);
6520}
6521
6522extern __inline __m512
6523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6524_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6525 __m512i __C, const int __imm, const int __R)
6526{
6527 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6528 (__v16sf) __B,
6529 (__v16si) __C,
6530 __imm,
6531 (__mmask16) __U, __R);
6532}
6533
6534extern __inline __m128d
6535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6536_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6537 const int __imm, const int __R)
6538{
6539 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6540 (__v2df) __B,
6541 (__v2di) __C, __imm,
6542 (__mmask8) -1, __R);
6543}
6544
6545extern __inline __m128d
6546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6547_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6548 __m128i __C, const int __imm, const int __R)
6549{
6550 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6551 (__v2df) __B,
6552 (__v2di) __C, __imm,
6553 (__mmask8) __U, __R);
6554}
6555
6556extern __inline __m128d
6557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6558_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6559 __m128i __C, const int __imm, const int __R)
6560{
6561 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6562 (__v2df) __B,
6563 (__v2di) __C,
6564 __imm,
6565 (__mmask8) __U, __R);
6566}
6567
6568extern __inline __m128
6569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6570_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6571 const int __imm, const int __R)
6572{
6573 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6574 (__v4sf) __B,
6575 (__v4si) __C, __imm,
6576 (__mmask8) -1, __R);
6577}
6578
6579extern __inline __m128
6580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6582 __m128i __C, const int __imm, const int __R)
6583{
6584 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6585 (__v4sf) __B,
6586 (__v4si) __C, __imm,
6587 (__mmask8) __U, __R);
6588}
6589
6590extern __inline __m128
6591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6592_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6593 __m128i __C, const int __imm, const int __R)
6594{
6595 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6596 (__v4sf) __B,
6597 (__v4si) __C, __imm,
6598 (__mmask8) __U, __R);
6599}
6600
6601#else
6602#define _mm512_shuffle_pd(X, Y, C) \
6603 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6604 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 6605 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6606 (__mmask8)-1))
6607
6608#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6609 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6610 (__v8df)(__m512d)(Y), (int)(C),\
6611 (__v8df)(__m512d)(W),\
6612 (__mmask8)(U)))
6613
6614#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6615 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6616 (__v8df)(__m512d)(Y), (int)(C),\
6617 (__v8df)(__m512d)_mm512_setzero_pd(),\
6618 (__mmask8)(U)))
6619
6620#define _mm512_shuffle_ps(X, Y, C) \
6621 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6622 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 6623 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6624 (__mmask16)-1))
6625
6626#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6627 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6628 (__v16sf)(__m512)(Y), (int)(C),\
6629 (__v16sf)(__m512)(W),\
6630 (__mmask16)(U)))
6631
6632#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6633 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6634 (__v16sf)(__m512)(Y), (int)(C),\
6635 (__v16sf)(__m512)_mm512_setzero_ps(),\
6636 (__mmask16)(U)))
6637
6638#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6639 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6640 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6641 (__mmask8)(-1), (R)))
6642
6643#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6644 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6645 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6646 (__mmask8)(U), (R)))
6647
6648#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6649 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6650 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6651 (__mmask8)(U), (R)))
6652
6653#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6654 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6655 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6656 (__mmask16)(-1), (R)))
6657
6658#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6659 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6660 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6661 (__mmask16)(U), (R)))
6662
6663#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6664 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6665 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6666 (__mmask16)(U), (R)))
6667
6668#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6669 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6670 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6671 (__mmask8)(-1), (R)))
6672
6673#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6674 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6675 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6676 (__mmask8)(U), (R)))
6677
6678#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6679 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6680 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6681 (__mmask8)(U), (R)))
6682
6683#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6684 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6685 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6686 (__mmask8)(-1), (R)))
6687
6688#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6689 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6690 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6691 (__mmask8)(U), (R)))
6692
6693#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6694 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6695 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6696 (__mmask8)(U), (R)))
6697#endif
6698
6699extern __inline __m512
6700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6701_mm512_movehdup_ps (__m512 __A)
6702{
6703 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6704 (__v16sf)
0b192937 6705 _mm512_undefined_ps (),
756c5857
AI
6706 (__mmask16) -1);
6707}
6708
6709extern __inline __m512
6710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6711_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6712{
6713 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6714 (__v16sf) __W,
6715 (__mmask16) __U);
6716}
6717
6718extern __inline __m512
6719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6720_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6721{
6722 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6723 (__v16sf)
6724 _mm512_setzero_ps (),
6725 (__mmask16) __U);
6726}
6727
6728extern __inline __m512
6729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6730_mm512_moveldup_ps (__m512 __A)
6731{
6732 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6733 (__v16sf)
0b192937 6734 _mm512_undefined_ps (),
756c5857
AI
6735 (__mmask16) -1);
6736}
6737
6738extern __inline __m512
6739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6740_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6741{
6742 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6743 (__v16sf) __W,
6744 (__mmask16) __U);
6745}
6746
6747extern __inline __m512
6748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6749_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6750{
6751 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6752 (__v16sf)
6753 _mm512_setzero_ps (),
6754 (__mmask16) __U);
6755}
6756
6757extern __inline __m512i
6758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6759_mm512_or_si512 (__m512i __A, __m512i __B)
6760{
2069d6fc 6761 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
6762}
6763
6764extern __inline __m512i
6765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6766_mm512_or_epi32 (__m512i __A, __m512i __B)
6767{
2069d6fc 6768 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
6769}
6770
6771extern __inline __m512i
6772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6773_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6774{
6775 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6776 (__v16si) __B,
6777 (__v16si) __W,
6778 (__mmask16) __U);
6779}
6780
6781extern __inline __m512i
6782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6783_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6784{
6785 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6786 (__v16si) __B,
6787 (__v16si)
6788 _mm512_setzero_si512 (),
6789 (__mmask16) __U);
6790}
6791
6792extern __inline __m512i
6793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6794_mm512_or_epi64 (__m512i __A, __m512i __B)
6795{
2069d6fc 6796 return (__m512i) ((__v8du) __A | (__v8du) __B);
756c5857
AI
6797}
6798
6799extern __inline __m512i
6800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6802{
6803 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6804 (__v8di) __B,
6805 (__v8di) __W,
6806 (__mmask8) __U);
6807}
6808
6809extern __inline __m512i
6810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6811_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6812{
6813 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6814 (__v8di) __B,
6815 (__v8di)
6816 _mm512_setzero_si512 (),
6817 (__mmask8) __U);
6818}
6819
6820extern __inline __m512i
6821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6822_mm512_xor_si512 (__m512i __A, __m512i __B)
6823{
2069d6fc 6824 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
6825}
6826
6827extern __inline __m512i
6828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6829_mm512_xor_epi32 (__m512i __A, __m512i __B)
6830{
2069d6fc 6831 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
6832}
6833
6834extern __inline __m512i
6835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6836_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6837{
6838 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6839 (__v16si) __B,
6840 (__v16si) __W,
6841 (__mmask16) __U);
6842}
6843
6844extern __inline __m512i
6845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6846_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6847{
6848 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6849 (__v16si) __B,
6850 (__v16si)
6851 _mm512_setzero_si512 (),
6852 (__mmask16) __U);
6853}
6854
6855extern __inline __m512i
6856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6857_mm512_xor_epi64 (__m512i __A, __m512i __B)
6858{
2069d6fc 6859 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
756c5857
AI
6860}
6861
6862extern __inline __m512i
6863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6864_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6865{
6866 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6867 (__v8di) __B,
6868 (__v8di) __W,
6869 (__mmask8) __U);
6870}
6871
6872extern __inline __m512i
6873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6874_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6875{
6876 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6877 (__v8di) __B,
6878 (__v8di)
6879 _mm512_setzero_si512 (),
6880 (__mmask8) __U);
6881}
6882
6883#ifdef __OPTIMIZE__
6884extern __inline __m512i
6885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6886_mm512_rol_epi32 (__m512i __A, const int __B)
6887{
6888 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6889 (__v16si)
4271e5cb 6890 _mm512_undefined_epi32 (),
756c5857
AI
6891 (__mmask16) -1);
6892}
6893
6894extern __inline __m512i
6895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6896_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6897{
6898 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6899 (__v16si) __W,
6900 (__mmask16) __U);
6901}
6902
6903extern __inline __m512i
6904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6905_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6906{
6907 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6908 (__v16si)
6909 _mm512_setzero_si512 (),
6910 (__mmask16) __U);
6911}
6912
6913extern __inline __m512i
6914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6915_mm512_ror_epi32 (__m512i __A, int __B)
6916{
6917 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6918 (__v16si)
4271e5cb 6919 _mm512_undefined_epi32 (),
756c5857
AI
6920 (__mmask16) -1);
6921}
6922
6923extern __inline __m512i
6924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6925_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6926{
6927 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6928 (__v16si) __W,
6929 (__mmask16) __U);
6930}
6931
6932extern __inline __m512i
6933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6934_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6935{
6936 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6937 (__v16si)
6938 _mm512_setzero_si512 (),
6939 (__mmask16) __U);
6940}
6941
6942extern __inline __m512i
6943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944_mm512_rol_epi64 (__m512i __A, const int __B)
6945{
6946 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6947 (__v8di)
4271e5cb 6948 _mm512_undefined_epi32 (),
756c5857
AI
6949 (__mmask8) -1);
6950}
6951
6952extern __inline __m512i
6953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6954_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6955{
6956 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6957 (__v8di) __W,
6958 (__mmask8) __U);
6959}
6960
6961extern __inline __m512i
6962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6963_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6964{
6965 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6966 (__v8di)
6967 _mm512_setzero_si512 (),
6968 (__mmask8) __U);
6969}
6970
6971extern __inline __m512i
6972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6973_mm512_ror_epi64 (__m512i __A, int __B)
6974{
6975 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6976 (__v8di)
4271e5cb 6977 _mm512_undefined_epi32 (),
756c5857
AI
6978 (__mmask8) -1);
6979}
6980
6981extern __inline __m512i
6982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6983_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6984{
6985 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6986 (__v8di) __W,
6987 (__mmask8) __U);
6988}
6989
6990extern __inline __m512i
6991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6992_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6993{
6994 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6995 (__v8di)
6996 _mm512_setzero_si512 (),
6997 (__mmask8) __U);
6998}
6999
7000#else
7001#define _mm512_rol_epi32(A, B) \
7002 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7003 (int)(B), \
4271e5cb 7004 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7005 (__mmask16)(-1)))
7006#define _mm512_mask_rol_epi32(W, U, A, B) \
7007 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7008 (int)(B), \
7009 (__v16si)(__m512i)(W), \
7010 (__mmask16)(U)))
7011#define _mm512_maskz_rol_epi32(U, A, B) \
7012 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7013 (int)(B), \
7014 (__v16si)_mm512_setzero_si512 (), \
7015 (__mmask16)(U)))
7016#define _mm512_ror_epi32(A, B) \
7017 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7018 (int)(B), \
4271e5cb 7019 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7020 (__mmask16)(-1)))
7021#define _mm512_mask_ror_epi32(W, U, A, B) \
7022 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7023 (int)(B), \
7024 (__v16si)(__m512i)(W), \
7025 (__mmask16)(U)))
7026#define _mm512_maskz_ror_epi32(U, A, B) \
7027 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7028 (int)(B), \
7029 (__v16si)_mm512_setzero_si512 (), \
7030 (__mmask16)(U)))
7031#define _mm512_rol_epi64(A, B) \
7032 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7033 (int)(B), \
4271e5cb 7034 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7035 (__mmask8)(-1)))
7036#define _mm512_mask_rol_epi64(W, U, A, B) \
7037 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7038 (int)(B), \
7039 (__v8di)(__m512i)(W), \
7040 (__mmask8)(U)))
7041#define _mm512_maskz_rol_epi64(U, A, B) \
7042 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7043 (int)(B), \
7044 (__v8di)_mm512_setzero_si512 (), \
7045 (__mmask8)(U)))
7046
7047#define _mm512_ror_epi64(A, B) \
7048 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7049 (int)(B), \
4271e5cb 7050 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7051 (__mmask8)(-1)))
7052#define _mm512_mask_ror_epi64(W, U, A, B) \
7053 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7054 (int)(B), \
7055 (__v8di)(__m512i)(W), \
7056 (__mmask8)(U)))
7057#define _mm512_maskz_ror_epi64(U, A, B) \
7058 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7059 (int)(B), \
7060 (__v8di)_mm512_setzero_si512 (), \
7061 (__mmask8)(U)))
7062#endif
7063
7064extern __inline __m512i
7065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7066_mm512_and_si512 (__m512i __A, __m512i __B)
7067{
2069d6fc 7068 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7069}
7070
7071extern __inline __m512i
7072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7073_mm512_and_epi32 (__m512i __A, __m512i __B)
7074{
2069d6fc 7075 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7076}
7077
7078extern __inline __m512i
7079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7080_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7081{
7082 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7083 (__v16si) __B,
7084 (__v16si) __W,
7085 (__mmask16) __U);
7086}
7087
7088extern __inline __m512i
7089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7090_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7091{
7092 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7093 (__v16si) __B,
7094 (__v16si)
7095 _mm512_setzero_si512 (),
7096 (__mmask16) __U);
7097}
7098
7099extern __inline __m512i
7100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7101_mm512_and_epi64 (__m512i __A, __m512i __B)
7102{
2069d6fc 7103 return (__m512i) ((__v8du) __A & (__v8du) __B);
756c5857
AI
7104}
7105
7106extern __inline __m512i
7107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7108_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7109{
7110 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7111 (__v8di) __B,
7112 (__v8di) __W, __U);
7113}
7114
7115extern __inline __m512i
7116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7117_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7118{
7119 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7120 (__v8di) __B,
7121 (__v8di)
7122 _mm512_setzero_pd (),
7123 __U);
7124}
7125
7126extern __inline __m512i
7127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7128_mm512_andnot_si512 (__m512i __A, __m512i __B)
7129{
7130 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7131 (__v16si) __B,
7132 (__v16si)
4271e5cb 7133 _mm512_undefined_epi32 (),
756c5857
AI
7134 (__mmask16) -1);
7135}
7136
7137extern __inline __m512i
7138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7139_mm512_andnot_epi32 (__m512i __A, __m512i __B)
7140{
7141 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7142 (__v16si) __B,
7143 (__v16si)
4271e5cb 7144 _mm512_undefined_epi32 (),
756c5857
AI
7145 (__mmask16) -1);
7146}
7147
7148extern __inline __m512i
7149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7150_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7151{
7152 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7153 (__v16si) __B,
7154 (__v16si) __W,
7155 (__mmask16) __U);
7156}
7157
7158extern __inline __m512i
7159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7160_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7161{
7162 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7163 (__v16si) __B,
7164 (__v16si)
7165 _mm512_setzero_si512 (),
7166 (__mmask16) __U);
7167}
7168
7169extern __inline __m512i
7170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7171_mm512_andnot_epi64 (__m512i __A, __m512i __B)
7172{
7173 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7174 (__v8di) __B,
7175 (__v8di)
4271e5cb 7176 _mm512_undefined_epi32 (),
756c5857
AI
7177 (__mmask8) -1);
7178}
7179
7180extern __inline __m512i
7181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7182_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7183{
7184 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7185 (__v8di) __B,
7186 (__v8di) __W, __U);
7187}
7188
7189extern __inline __m512i
7190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7191_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7192{
7193 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7194 (__v8di) __B,
7195 (__v8di)
7196 _mm512_setzero_pd (),
7197 __U);
7198}
7199
7200extern __inline __mmask16
7201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7202_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7203{
7204 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7205 (__v16si) __B,
7206 (__mmask16) -1);
7207}
7208
7209extern __inline __mmask16
7210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7211_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7212{
7213 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7214 (__v16si) __B, __U);
7215}
7216
7217extern __inline __mmask8
7218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7220{
7221 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7222 (__v8di) __B,
7223 (__mmask8) -1);
7224}
7225
7226extern __inline __mmask8
7227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7228_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7229{
7230 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7231}
7232
260d3642
IT
7233extern __inline __mmask16
7234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7236{
7237 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7238 (__v16si) __B,
7239 (__mmask16) -1);
7240}
7241
7242extern __inline __mmask16
7243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7244_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7245{
7246 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7247 (__v16si) __B, __U);
7248}
7249
7250extern __inline __mmask8
7251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7252_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7253{
7254 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7255 (__v8di) __B,
7256 (__mmask8) -1);
7257}
7258
7259extern __inline __mmask8
7260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7261_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7262{
7263 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7264 (__v8di) __B, __U);
7265}
7266
756c5857
AI
7267extern __inline __m512i
7268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7269_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7270{
7271 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7272 (__v16si) __B,
7273 (__v16si)
4271e5cb 7274 _mm512_undefined_epi32 (),
756c5857
AI
7275 (__mmask16) -1);
7276}
7277
7278extern __inline __m512i
7279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7280_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7281 __m512i __B)
7282{
7283 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7284 (__v16si) __B,
7285 (__v16si) __W,
7286 (__mmask16) __U);
7287}
7288
7289extern __inline __m512i
7290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7291_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7292{
7293 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7294 (__v16si) __B,
7295 (__v16si)
7296 _mm512_setzero_si512 (),
7297 (__mmask16) __U);
7298}
7299
7300extern __inline __m512i
7301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7302_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7303{
7304 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7305 (__v8di) __B,
7306 (__v8di)
4271e5cb 7307 _mm512_undefined_epi32 (),
756c5857
AI
7308 (__mmask8) -1);
7309}
7310
7311extern __inline __m512i
7312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7313_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7314{
7315 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7316 (__v8di) __B,
7317 (__v8di) __W,
7318 (__mmask8) __U);
7319}
7320
7321extern __inline __m512i
7322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7323_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7324{
7325 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7326 (__v8di) __B,
7327 (__v8di)
7328 _mm512_setzero_si512 (),
7329 (__mmask8) __U);
7330}
7331
7332extern __inline __m512i
7333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7334_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7335{
7336 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7337 (__v16si) __B,
7338 (__v16si)
4271e5cb 7339 _mm512_undefined_epi32 (),
756c5857
AI
7340 (__mmask16) -1);
7341}
7342
7343extern __inline __m512i
7344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7345_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7346 __m512i __B)
7347{
7348 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7349 (__v16si) __B,
7350 (__v16si) __W,
7351 (__mmask16) __U);
7352}
7353
7354extern __inline __m512i
7355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7356_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7357{
7358 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7359 (__v16si) __B,
7360 (__v16si)
7361 _mm512_setzero_si512 (),
7362 (__mmask16) __U);
7363}
7364
7365extern __inline __m512i
7366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7367_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7368{
7369 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7370 (__v8di) __B,
7371 (__v8di)
4271e5cb 7372 _mm512_undefined_epi32 (),
756c5857
AI
7373 (__mmask8) -1);
7374}
7375
7376extern __inline __m512i
7377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7378_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7379{
7380 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7381 (__v8di) __B,
7382 (__v8di) __W,
7383 (__mmask8) __U);
7384}
7385
7386extern __inline __m512i
7387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7388_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7389{
7390 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7391 (__v8di) __B,
7392 (__v8di)
7393 _mm512_setzero_si512 (),
7394 (__mmask8) __U);
7395}
7396
7397#ifdef __x86_64__
7398#ifdef __OPTIMIZE__
7399extern __inline unsigned long long
7400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7401_mm_cvt_roundss_u64 (__m128 __A, const int __R)
7402{
7403 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7404}
7405
7406extern __inline long long
7407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7408_mm_cvt_roundss_si64 (__m128 __A, const int __R)
7409{
7410 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7411}
7412
7413extern __inline long long
7414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7415_mm_cvt_roundss_i64 (__m128 __A, const int __R)
7416{
7417 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7418}
7419
7420extern __inline unsigned long long
7421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7422_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7423{
7424 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7425}
7426
7427extern __inline long long
7428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7429_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7430{
7431 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7432}
7433
7434extern __inline long long
7435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7436_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7437{
7438 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7439}
7440#else
7441#define _mm_cvt_roundss_u64(A, B) \
7442 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7443
7444#define _mm_cvt_roundss_si64(A, B) \
7445 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7446
7447#define _mm_cvt_roundss_i64(A, B) \
7448 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7449
7450#define _mm_cvtt_roundss_u64(A, B) \
7451 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7452
7453#define _mm_cvtt_roundss_i64(A, B) \
7454 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7455
7456#define _mm_cvtt_roundss_si64(A, B) \
7457 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7458#endif
7459#endif
7460
7461#ifdef __OPTIMIZE__
7462extern __inline unsigned
7463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7464_mm_cvt_roundss_u32 (__m128 __A, const int __R)
7465{
7466 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7467}
7468
7469extern __inline int
7470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7471_mm_cvt_roundss_si32 (__m128 __A, const int __R)
7472{
7473 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7474}
7475
7476extern __inline int
7477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7478_mm_cvt_roundss_i32 (__m128 __A, const int __R)
7479{
7480 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7481}
7482
7483extern __inline unsigned
7484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7485_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7486{
7487 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7488}
7489
7490extern __inline int
7491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7492_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7493{
7494 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7495}
7496
7497extern __inline int
7498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7499_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7500{
7501 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7502}
7503#else
7504#define _mm_cvt_roundss_u32(A, B) \
7505 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7506
7507#define _mm_cvt_roundss_si32(A, B) \
7508 ((int)__builtin_ia32_vcvtss2si32(A, B))
7509
7510#define _mm_cvt_roundss_i32(A, B) \
7511 ((int)__builtin_ia32_vcvtss2si32(A, B))
7512
7513#define _mm_cvtt_roundss_u32(A, B) \
7514 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7515
7516#define _mm_cvtt_roundss_si32(A, B) \
7517 ((int)__builtin_ia32_vcvttss2si32(A, B))
7518
7519#define _mm_cvtt_roundss_i32(A, B) \
7520 ((int)__builtin_ia32_vcvttss2si32(A, B))
7521#endif
7522
7523#ifdef __x86_64__
7524#ifdef __OPTIMIZE__
7525extern __inline unsigned long long
7526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7527_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7528{
7529 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7530}
7531
7532extern __inline long long
7533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7534_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7535{
7536 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7537}
7538
7539extern __inline long long
7540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7541_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7542{
7543 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7544}
7545
7546extern __inline unsigned long long
7547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7548_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7549{
7550 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7551}
7552
7553extern __inline long long
7554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7555_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7556{
7557 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7558}
7559
7560extern __inline long long
7561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7562_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7563{
7564 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7565}
7566#else
7567#define _mm_cvt_roundsd_u64(A, B) \
7568 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7569
7570#define _mm_cvt_roundsd_si64(A, B) \
7571 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7572
7573#define _mm_cvt_roundsd_i64(A, B) \
7574 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7575
7576#define _mm_cvtt_roundsd_u64(A, B) \
7577 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7578
7579#define _mm_cvtt_roundsd_si64(A, B) \
7580 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7581
7582#define _mm_cvtt_roundsd_i64(A, B) \
7583 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7584#endif
7585#endif
7586
7587#ifdef __OPTIMIZE__
7588extern __inline unsigned
7589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7590_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7591{
7592 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7593}
7594
7595extern __inline int
7596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7598{
7599 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7600}
7601
7602extern __inline int
7603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7604_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7605{
7606 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7607}
7608
7609extern __inline unsigned
7610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7611_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7612{
7613 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7614}
7615
7616extern __inline int
7617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7618_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7619{
7620 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7621}
7622
7623extern __inline int
7624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7625_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7626{
7627 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7628}
7629#else
7630#define _mm_cvt_roundsd_u32(A, B) \
7631 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7632
7633#define _mm_cvt_roundsd_si32(A, B) \
7634 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7635
7636#define _mm_cvt_roundsd_i32(A, B) \
7637 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7638
7639#define _mm_cvtt_roundsd_u32(A, B) \
7640 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7641
7642#define _mm_cvtt_roundsd_si32(A, B) \
7643 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7644
7645#define _mm_cvtt_roundsd_i32(A, B) \
7646 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7647#endif
7648
7649extern __inline __m512d
7650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7651_mm512_movedup_pd (__m512d __A)
7652{
7653 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7654 (__v8df)
0b192937 7655 _mm512_undefined_pd (),
756c5857
AI
7656 (__mmask8) -1);
7657}
7658
7659extern __inline __m512d
7660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7661_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7662{
7663 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7664 (__v8df) __W,
7665 (__mmask8) __U);
7666}
7667
7668extern __inline __m512d
7669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7670_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7671{
7672 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7673 (__v8df)
7674 _mm512_setzero_pd (),
7675 (__mmask8) __U);
7676}
7677
7678extern __inline __m512d
7679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7680_mm512_unpacklo_pd (__m512d __A, __m512d __B)
7681{
7682 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7683 (__v8df) __B,
7684 (__v8df)
0b192937 7685 _mm512_undefined_pd (),
756c5857
AI
7686 (__mmask8) -1);
7687}
7688
7689extern __inline __m512d
7690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7691_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7692{
7693 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7694 (__v8df) __B,
7695 (__v8df) __W,
7696 (__mmask8) __U);
7697}
7698
7699extern __inline __m512d
7700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7701_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7702{
7703 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7704 (__v8df) __B,
7705 (__v8df)
7706 _mm512_setzero_pd (),
7707 (__mmask8) __U);
7708}
7709
7710extern __inline __m512d
7711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7712_mm512_unpackhi_pd (__m512d __A, __m512d __B)
7713{
7714 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7715 (__v8df) __B,
7716 (__v8df)
0b192937 7717 _mm512_undefined_pd (),
756c5857
AI
7718 (__mmask8) -1);
7719}
7720
7721extern __inline __m512d
7722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7723_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7724{
7725 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7726 (__v8df) __B,
7727 (__v8df) __W,
7728 (__mmask8) __U);
7729}
7730
7731extern __inline __m512d
7732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7733_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7734{
7735 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7736 (__v8df) __B,
7737 (__v8df)
7738 _mm512_setzero_pd (),
7739 (__mmask8) __U);
7740}
7741
7742extern __inline __m512
7743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744_mm512_unpackhi_ps (__m512 __A, __m512 __B)
7745{
7746 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7747 (__v16sf) __B,
7748 (__v16sf)
0b192937 7749 _mm512_undefined_ps (),
756c5857
AI
7750 (__mmask16) -1);
7751}
7752
7753extern __inline __m512
7754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7755_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7756{
7757 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7758 (__v16sf) __B,
7759 (__v16sf) __W,
7760 (__mmask16) __U);
7761}
7762
7763extern __inline __m512
7764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7765_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7766{
7767 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7768 (__v16sf) __B,
7769 (__v16sf)
7770 _mm512_setzero_ps (),
7771 (__mmask16) __U);
7772}
7773
7774#ifdef __OPTIMIZE__
7775extern __inline __m512d
7776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7777_mm512_cvt_roundps_pd (__m256 __A, const int __R)
7778{
7779 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7780 (__v8df)
0b192937 7781 _mm512_undefined_pd (),
756c5857
AI
7782 (__mmask8) -1, __R);
7783}
7784
7785extern __inline __m512d
7786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7788 const int __R)
7789{
7790 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7791 (__v8df) __W,
7792 (__mmask8) __U, __R);
7793}
7794
7795extern __inline __m512d
7796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7797_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7798{
7799 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7800 (__v8df)
7801 _mm512_setzero_pd (),
7802 (__mmask8) __U, __R);
7803}
7804
7805extern __inline __m512
7806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7807_mm512_cvt_roundph_ps (__m256i __A, const int __R)
7808{
7809 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7810 (__v16sf)
0b192937 7811 _mm512_undefined_ps (),
756c5857
AI
7812 (__mmask16) -1, __R);
7813}
7814
7815extern __inline __m512
7816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7817_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7818 const int __R)
7819{
7820 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7821 (__v16sf) __W,
7822 (__mmask16) __U, __R);
7823}
7824
7825extern __inline __m512
7826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7828{
7829 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7830 (__v16sf)
7831 _mm512_setzero_ps (),
7832 (__mmask16) __U, __R);
7833}
7834
7835extern __inline __m256i
7836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837_mm512_cvt_roundps_ph (__m512 __A, const int __I)
7838{
7839 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7840 __I,
7841 (__v16hi)
0b192937 7842 _mm256_undefined_si256 (),
756c5857
AI
7843 -1);
7844}
7845
7846extern __inline __m256i
7847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848_mm512_cvtps_ph (__m512 __A, const int __I)
7849{
7850 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7851 __I,
7852 (__v16hi)
0b192937 7853 _mm256_undefined_si256 (),
756c5857
AI
7854 -1);
7855}
7856
7857extern __inline __m256i
7858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7859_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7860 const int __I)
7861{
7862 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7863 __I,
7864 (__v16hi) __U,
7865 (__mmask16) __W);
7866}
7867
7868extern __inline __m256i
7869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7870_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7871{
7872 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7873 __I,
7874 (__v16hi) __U,
7875 (__mmask16) __W);
7876}
7877
7878extern __inline __m256i
7879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7880_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7881{
7882 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7883 __I,
7884 (__v16hi)
7885 _mm256_setzero_si256 (),
7886 (__mmask16) __W);
7887}
7888
7889extern __inline __m256i
7890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7891_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7892{
7893 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7894 __I,
7895 (__v16hi)
7896 _mm256_setzero_si256 (),
7897 (__mmask16) __W);
7898}
7899#else
7900#define _mm512_cvt_roundps_pd(A, B) \
0b192937 7901 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
756c5857
AI
7902
7903#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7904 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7905
7906#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7907 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7908
7909#define _mm512_cvt_roundph_ps(A, B) \
0b192937 7910 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
7911
7912#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7913 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7914
7915#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7916 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7917
7918#define _mm512_cvt_roundps_ph(A, I) \
7919 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0b192937 7920 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857
AI
7921#define _mm512_cvtps_ph(A, I) \
7922 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0b192937 7923 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857
AI
7924#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7925 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7926 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7927#define _mm512_mask_cvtps_ph(U, W, A, I) \
7928 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7929 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7930#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7931 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7932 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7933#define _mm512_maskz_cvtps_ph(W, A, I) \
7934 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7935 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7936#endif
7937
7938#ifdef __OPTIMIZE__
7939extern __inline __m256
7940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7941_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7942{
7943 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7944 (__v8sf)
0b192937 7945 _mm256_undefined_ps (),
756c5857
AI
7946 (__mmask8) -1, __R);
7947}
7948
7949extern __inline __m256
7950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7951_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7952 const int __R)
7953{
7954 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7955 (__v8sf) __W,
7956 (__mmask8) __U, __R);
7957}
7958
7959extern __inline __m256
7960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7961_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7962{
7963 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7964 (__v8sf)
7965 _mm256_setzero_ps (),
7966 (__mmask8) __U, __R);
7967}
7968
075691af
AI
7969extern __inline __m128
7970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7971_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7972{
7973 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7974 (__v2df) __B,
7975 __R);
7976}
7977
7978extern __inline __m128d
7979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7980_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7981{
7982 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7983 (__v4sf) __B,
7984 __R);
7985}
756c5857
AI
7986#else
7987#define _mm512_cvt_roundpd_ps(A, B) \
0b192937 7988 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
756c5857
AI
7989
7990#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7991 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7992
7993#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7994 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
075691af
AI
7995
7996#define _mm_cvt_roundsd_ss(A, B, C) \
7997 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
7998
7999#define _mm_cvt_roundss_sd(A, B, C) \
8000 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
756c5857
AI
8001#endif
8002
8003extern __inline void
8004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8005_mm512_stream_si512 (__m512i * __P, __m512i __A)
8006{
8007 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8008}
8009
8010extern __inline void
8011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8012_mm512_stream_ps (float *__P, __m512 __A)
8013{
8014 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8015}
8016
8017extern __inline void
8018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8019_mm512_stream_pd (double *__P, __m512d __A)
8020{
8021 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8022}
8023
c56a42b9
KY
8024extern __inline __m512i
8025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8026_mm512_stream_load_si512 (void *__P)
8027{
8028 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8029}
8030
9c3c2608
UB
8031/* Constants for mantissa extraction */
8032typedef enum
8033{
8034 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8035 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8036 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8037 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8038} _MM_MANTISSA_NORM_ENUM;
8039
8040typedef enum
8041{
8042 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8043 _MM_MANT_SIGN_zero, /* sign = 0 */
8044 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8045} _MM_MANTISSA_SIGN_ENUM;
8046
756c5857 8047#ifdef __OPTIMIZE__
075691af
AI
8048extern __inline __m128
8049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8050_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8051{
8052 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8053 (__v4sf) __B,
8054 __R);
8055}
8056
8057extern __inline __m128d
8058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8059_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8060{
8061 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8062 (__v2df) __B,
8063 __R);
8064}
8065
756c5857
AI
8066extern __inline __m512
8067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8068_mm512_getexp_round_ps (__m512 __A, const int __R)
8069{
8070 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8071 (__v16sf)
0b192937 8072 _mm512_undefined_ps (),
756c5857
AI
8073 (__mmask16) -1, __R);
8074}
8075
8076extern __inline __m512
8077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8078_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8079 const int __R)
8080{
8081 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8082 (__v16sf) __W,
8083 (__mmask16) __U, __R);
8084}
8085
8086extern __inline __m512
8087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8088_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8089{
8090 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8091 (__v16sf)
8092 _mm512_setzero_ps (),
8093 (__mmask16) __U, __R);
8094}
8095
8096extern __inline __m512d
8097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8098_mm512_getexp_round_pd (__m512d __A, const int __R)
8099{
8100 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8101 (__v8df)
0b192937 8102 _mm512_undefined_pd (),
756c5857
AI
8103 (__mmask8) -1, __R);
8104}
8105
8106extern __inline __m512d
8107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8108_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8109 const int __R)
8110{
8111 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8112 (__v8df) __W,
8113 (__mmask8) __U, __R);
8114}
8115
8116extern __inline __m512d
8117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8118_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8119{
8120 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8121 (__v8df)
8122 _mm512_setzero_pd (),
8123 (__mmask8) __U, __R);
8124}
8125
756c5857
AI
8126extern __inline __m512d
8127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8128_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8129 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8130{
8131 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8132 (__C << 2) | __B,
0b192937 8133 _mm512_undefined_pd (),
756c5857
AI
8134 (__mmask8) -1, __R);
8135}
8136
8137extern __inline __m512d
8138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8139_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8140 _MM_MANTISSA_NORM_ENUM __B,
8141 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8142{
8143 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8144 (__C << 2) | __B,
8145 (__v8df) __W, __U,
8146 __R);
8147}
8148
8149extern __inline __m512d
8150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8151_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8152 _MM_MANTISSA_NORM_ENUM __B,
8153 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8154{
8155 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8156 (__C << 2) | __B,
8157 (__v8df)
8158 _mm512_setzero_pd (),
8159 __U, __R);
8160}
8161
8162extern __inline __m512
8163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8164_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8165 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8166{
8167 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8168 (__C << 2) | __B,
0b192937 8169 _mm512_undefined_ps (),
756c5857
AI
8170 (__mmask16) -1, __R);
8171}
8172
8173extern __inline __m512
8174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8175_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8176 _MM_MANTISSA_NORM_ENUM __B,
8177 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8178{
8179 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8180 (__C << 2) | __B,
8181 (__v16sf) __W, __U,
8182 __R);
8183}
8184
8185extern __inline __m512
8186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8187_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8188 _MM_MANTISSA_NORM_ENUM __B,
8189 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8190{
8191 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8192 (__C << 2) | __B,
8193 (__v16sf)
8194 _mm512_setzero_ps (),
8195 __U, __R);
8196}
8197
075691af
AI
8198extern __inline __m128d
8199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200_mm_getmant_round_sd (__m128d __A, __m128d __B,
8201 _MM_MANTISSA_NORM_ENUM __C,
8202 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8203{
8204 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8205 (__v2df) __B,
8206 (__D << 2) | __C,
8207 __R);
8208}
8209
8210extern __inline __m128
8211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8212_mm_getmant_round_ss (__m128 __A, __m128 __B,
8213 _MM_MANTISSA_NORM_ENUM __C,
8214 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8215{
8216 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8217 (__v4sf) __B,
8218 (__D << 2) | __C,
8219 __R);
8220}
8221
756c5857
AI
8222#else
8223#define _mm512_getmant_round_pd(X, B, C, R) \
8224 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8225 (int)(((C)<<2) | (B)), \
0b192937 8226 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
8227 (__mmask8)-1,\
8228 (R)))
8229
8230#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8231 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8232 (int)(((C)<<2) | (B)), \
8233 (__v8df)(__m512d)(W), \
8234 (__mmask8)(U),\
8235 (R)))
8236
8237#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8238 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8239 (int)(((C)<<2) | (B)), \
8240 (__v8df)(__m512d)_mm512_setzero_pd(), \
8241 (__mmask8)(U),\
8242 (R)))
8243#define _mm512_getmant_round_ps(X, B, C, R) \
8244 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8245 (int)(((C)<<2) | (B)), \
0b192937 8246 (__v16sf)(__m512)_mm512_undefined_ps(), \
756c5857
AI
8247 (__mmask16)-1,\
8248 (R)))
8249
8250#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8251 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8252 (int)(((C)<<2) | (B)), \
8253 (__v16sf)(__m512)(W), \
8254 (__mmask16)(U),\
8255 (R)))
8256
8257#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8258 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8259 (int)(((C)<<2) | (B)), \
8260 (__v16sf)(__m512)_mm512_setzero_ps(), \
8261 (__mmask16)(U),\
8262 (R)))
075691af
AI
8263#define _mm_getmant_round_sd(X, Y, C, D, R) \
8264 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8265 (__v2df)(__m128d)(Y), \
8266 (int)(((D)<<2) | (C)), \
8267 (R)))
8268
8269#define _mm_getmant_round_ss(X, Y, C, D, R) \
8270 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8271 (__v4sf)(__m128)(Y), \
8272 (int)(((D)<<2) | (C)), \
8273 (R)))
8274
8275#define _mm_getexp_round_ss(A, B, R) \
8276 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8277
8278#define _mm_getexp_round_sd(A, B, R) \
8279 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8280
756c5857
AI
8281#define _mm512_getexp_round_ps(A, R) \
8282 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 8283 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
756c5857
AI
8284
8285#define _mm512_mask_getexp_round_ps(W, U, A, R) \
8286 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8287 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8288
8289#define _mm512_maskz_getexp_round_ps(U, A, R) \
8290 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8291 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8292
8293#define _mm512_getexp_round_pd(A, R) \
8294 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 8295 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
756c5857
AI
8296
8297#define _mm512_mask_getexp_round_pd(W, U, A, R) \
8298 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8299 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8300
8301#define _mm512_maskz_getexp_round_pd(U, A, R) \
8302 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8303 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8304#endif
8305
8306#ifdef __OPTIMIZE__
8307extern __inline __m512
8308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8309_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8310{
8311 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
8312 (__v16sf)
8313 _mm512_undefined_ps (),
8314 -1, __R);
756c5857
AI
8315}
8316
8317extern __inline __m512
8318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8319_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8320 const int __imm, const int __R)
8321{
8322 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8323 (__v16sf) __A,
8324 (__mmask16) __B, __R);
8325}
8326
8327extern __inline __m512
8328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8329_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8330 const int __imm, const int __R)
8331{
8332 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8333 __imm,
8334 (__v16sf)
8335 _mm512_setzero_ps (),
8336 (__mmask16) __A, __R);
8337}
8338
8339extern __inline __m512d
8340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8341_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8342{
8343 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
8344 (__v8df)
8345 _mm512_undefined_pd (),
8346 -1, __R);
756c5857
AI
8347}
8348
8349extern __inline __m512d
8350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8351_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8352 __m512d __C, const int __imm, const int __R)
8353{
8354 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8355 (__v8df) __A,
8356 (__mmask8) __B, __R);
8357}
8358
8359extern __inline __m512d
8360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8361_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8362 const int __imm, const int __R)
8363{
8364 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8365 __imm,
8366 (__v8df)
8367 _mm512_setzero_pd (),
8368 (__mmask8) __A, __R);
8369}
075691af
AI
8370
8371extern __inline __m128
8372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8373_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8374{
8375 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8376 (__v4sf) __B, __imm, __R);
8377}
8378
8379extern __inline __m128d
8380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8381_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8382 const int __R)
8383{
8384 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8385 (__v2df) __B, __imm, __R);
8386}
8387
756c5857
AI
8388#else
8389#define _mm512_roundscale_round_ps(A, B, R) \
8390 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 8391 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
756c5857
AI
8392#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8393 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8394 (int)(D), \
8395 (__v16sf)(__m512)(A), \
8396 (__mmask16)(B), R))
8397#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8398 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8399 (int)(C), \
8400 (__v16sf)_mm512_setzero_ps(),\
8401 (__mmask16)(A), R))
8402#define _mm512_roundscale_round_pd(A, B, R) \
8403 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 8404 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
756c5857
AI
8405#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8406 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8407 (int)(D), \
8408 (__v8df)(__m512d)(A), \
8409 (__mmask8)(B), R))
8410#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8411 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8412 (int)(C), \
8413 (__v8df)_mm512_setzero_pd(),\
8414 (__mmask8)(A), R))
075691af
AI
8415#define _mm_roundscale_round_ss(A, B, C, R) \
8416 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8417 (__v4sf)(__m128)(B), (int)(C), R))
8418#define _mm_roundscale_round_sd(A, B, C, R) \
8419 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8420 (__v2df)(__m128d)(B), (int)(C), R))
756c5857
AI
8421#endif
8422
8423extern __inline __m512
8424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8425_mm512_floor_ps (__m512 __A)
8426{
8427 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8428 _MM_FROUND_FLOOR,
8429 (__v16sf) __A, -1,
8430 _MM_FROUND_CUR_DIRECTION);
8431}
8432
8433extern __inline __m512d
8434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8435_mm512_floor_pd (__m512d __A)
8436{
8437 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8438 _MM_FROUND_FLOOR,
8439 (__v8df) __A, -1,
8440 _MM_FROUND_CUR_DIRECTION);
8441}
8442
8443extern __inline __m512
8444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8445_mm512_ceil_ps (__m512 __A)
8446{
8447 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8448 _MM_FROUND_CEIL,
8449 (__v16sf) __A, -1,
8450 _MM_FROUND_CUR_DIRECTION);
8451}
8452
8453extern __inline __m512d
8454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8455_mm512_ceil_pd (__m512d __A)
8456{
8457 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8458 _MM_FROUND_CEIL,
8459 (__v8df) __A, -1,
8460 _MM_FROUND_CUR_DIRECTION);
8461}
8462
8463extern __inline __m512
8464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8465_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8466{
8467 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8468 _MM_FROUND_FLOOR,
8469 (__v16sf) __W, __U,
8470 _MM_FROUND_CUR_DIRECTION);
8471}
8472
8473extern __inline __m512d
8474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8475_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8476{
8477 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8478 _MM_FROUND_FLOOR,
8479 (__v8df) __W, __U,
8480 _MM_FROUND_CUR_DIRECTION);
8481}
8482
8483extern __inline __m512
8484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8485_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8486{
8487 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8488 _MM_FROUND_CEIL,
8489 (__v16sf) __W, __U,
8490 _MM_FROUND_CUR_DIRECTION);
8491}
8492
8493extern __inline __m512d
8494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8495_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8496{
8497 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8498 _MM_FROUND_CEIL,
8499 (__v8df) __W, __U,
8500 _MM_FROUND_CUR_DIRECTION);
8501}
8502
756c5857 8503#ifdef __OPTIMIZE__
756c5857
AI
8504extern __inline __m512i
8505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8506_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8507{
8508 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8509 (__v16si) __B, __imm,
8510 (__v16si)
4271e5cb 8511 _mm512_undefined_epi32 (),
756c5857
AI
8512 (__mmask16) -1);
8513}
8514
8515extern __inline __m512i
8516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8517_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8518 __m512i __B, const int __imm)
8519{
8520 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8521 (__v16si) __B, __imm,
8522 (__v16si) __W,
8523 (__mmask16) __U);
8524}
8525
8526extern __inline __m512i
8527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8528_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8529 const int __imm)
8530{
8531 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8532 (__v16si) __B, __imm,
8533 (__v16si)
8534 _mm512_setzero_si512 (),
8535 (__mmask16) __U);
8536}
8537
8538extern __inline __m512i
8539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8540_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8541{
8542 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8543 (__v8di) __B, __imm,
8544 (__v8di)
4271e5cb 8545 _mm512_undefined_epi32 (),
756c5857
AI
8546 (__mmask8) -1);
8547}
8548
8549extern __inline __m512i
8550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8551_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8552 __m512i __B, const int __imm)
8553{
8554 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8555 (__v8di) __B, __imm,
8556 (__v8di) __W,
8557 (__mmask8) __U);
8558}
8559
8560extern __inline __m512i
8561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8562_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8563 const int __imm)
8564{
8565 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8566 (__v8di) __B, __imm,
8567 (__v8di)
8568 _mm512_setzero_si512 (),
8569 (__mmask8) __U);
8570}
8571#else
756c5857
AI
8572#define _mm512_alignr_epi32(X, Y, C) \
8573 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
4271e5cb 8574 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
756c5857
AI
8575 (__mmask16)-1))
8576
8577#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8578 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8579 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8580 (__mmask16)(U)))
8581
8582#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8583 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
0b192937 8584 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
756c5857
AI
8585 (__mmask16)(U)))
8586
8587#define _mm512_alignr_epi64(X, Y, C) \
8588 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
4271e5cb 8589 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
0b192937 8590 (__mmask8)-1))
756c5857
AI
8591
8592#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8593 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8594 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8595
8596#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8597 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
0b192937 8598 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
756c5857
AI
8599 (__mmask8)(U)))
8600#endif
8601
8602extern __inline __mmask16
8603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8604_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8605{
8606 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8607 (__v16si) __B,
8608 (__mmask16) -1);
8609}
8610
8611extern __inline __mmask16
8612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8613_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8614{
8615 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8616 (__v16si) __B, __U);
8617}
8618
8619extern __inline __mmask8
8620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8621_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8622{
8623 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8624 (__v8di) __B, __U);
8625}
8626
8627extern __inline __mmask8
8628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8629_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8630{
8631 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8632 (__v8di) __B,
8633 (__mmask8) -1);
8634}
8635
8636extern __inline __mmask16
8637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8639{
8640 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8641 (__v16si) __B,
8642 (__mmask16) -1);
8643}
8644
8645extern __inline __mmask16
8646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8647_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8648{
8649 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8650 (__v16si) __B, __U);
8651}
8652
8653extern __inline __mmask8
8654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8655_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8656{
8657 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8658 (__v8di) __B, __U);
8659}
8660
8661extern __inline __mmask8
8662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8663_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8664{
8665 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8666 (__v8di) __B,
8667 (__mmask8) -1);
8668}
8669
d256b866
IT
8670extern __inline __mmask16
8671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8673{
8674 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8675 (__v16si) __Y, 5,
8676 (__mmask16) -1);
8677}
8678
275be1da
IT
8679extern __inline __mmask16
8680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8682{
8683 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8684 (__v16si) __Y, 5,
8685 (__mmask16) __M);
8686}
8687
8688extern __inline __mmask16
8689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8690_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8691{
8692 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8693 (__v16si) __Y, 5,
8694 (__mmask16) __M);
8695}
8696
d256b866
IT
8697extern __inline __mmask16
8698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8699_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8700{
8701 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8702 (__v16si) __Y, 5,
8703 (__mmask16) -1);
8704}
8705
275be1da
IT
8706extern __inline __mmask8
8707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8708_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8709{
8710 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8711 (__v8di) __Y, 5,
8712 (__mmask8) __M);
8713}
8714
d256b866
IT
8715extern __inline __mmask8
8716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8717_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8718{
8719 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8720 (__v8di) __Y, 5,
8721 (__mmask8) -1);
8722}
8723
275be1da
IT
8724extern __inline __mmask8
8725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8726_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8727{
8728 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8729 (__v8di) __Y, 5,
8730 (__mmask8) __M);
8731}
8732
d256b866
IT
8733extern __inline __mmask8
8734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8735_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8736{
8737 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8738 (__v8di) __Y, 5,
8739 (__mmask8) -1);
8740}
8741
275be1da
IT
8742extern __inline __mmask16
8743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8744_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8745{
8746 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8747 (__v16si) __Y, 2,
8748 (__mmask16) __M);
8749}
8750
d256b866
IT
8751extern __inline __mmask16
8752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8753_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8754{
8755 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8756 (__v16si) __Y, 2,
8757 (__mmask16) -1);
8758}
8759
275be1da
IT
8760extern __inline __mmask16
8761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8762_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8763{
8764 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8765 (__v16si) __Y, 2,
8766 (__mmask16) __M);
8767}
8768
d256b866
IT
8769extern __inline __mmask16
8770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8771_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8772{
8773 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8774 (__v16si) __Y, 2,
8775 (__mmask16) -1);
8776}
8777
275be1da
IT
8778extern __inline __mmask8
8779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8781{
8782 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8783 (__v8di) __Y, 2,
8784 (__mmask8) __M);
8785}
8786
d256b866
IT
8787extern __inline __mmask8
8788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8789_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8790{
8791 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8792 (__v8di) __Y, 2,
8793 (__mmask8) -1);
8794}
8795
275be1da
IT
8796extern __inline __mmask8
8797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8798_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8799{
8800 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8801 (__v8di) __Y, 2,
8802 (__mmask8) __M);
8803}
8804
d256b866
IT
8805extern __inline __mmask8
8806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8807_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8808{
8809 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8810 (__v8di) __Y, 2,
8811 (__mmask8) -1);
8812}
8813
275be1da
IT
8814extern __inline __mmask16
8815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8816_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8817{
8818 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8819 (__v16si) __Y, 1,
8820 (__mmask16) __M);
8821}
8822
d256b866
IT
8823extern __inline __mmask16
8824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8825_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8826{
8827 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8828 (__v16si) __Y, 1,
8829 (__mmask16) -1);
8830}
8831
275be1da
IT
8832extern __inline __mmask16
8833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8834_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8835{
8836 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8837 (__v16si) __Y, 1,
8838 (__mmask16) __M);
8839}
8840
d256b866
IT
8841extern __inline __mmask16
8842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8843_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8844{
8845 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8846 (__v16si) __Y, 1,
8847 (__mmask16) -1);
8848}
8849
275be1da
IT
8850extern __inline __mmask8
8851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8852_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8853{
8854 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8855 (__v8di) __Y, 1,
8856 (__mmask8) __M);
8857}
8858
d256b866
IT
8859extern __inline __mmask8
8860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8861_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8862{
8863 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8864 (__v8di) __Y, 1,
8865 (__mmask8) -1);
8866}
8867
275be1da
IT
8868extern __inline __mmask8
8869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8870_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8871{
8872 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8873 (__v8di) __Y, 1,
8874 (__mmask8) __M);
8875}
8876
d256b866
IT
8877extern __inline __mmask8
8878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8879_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8880{
8881 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8882 (__v8di) __Y, 1,
8883 (__mmask8) -1);
8884}
8885
8886extern __inline __mmask16
8887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8888_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8889{
8890 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8891 (__v16si) __Y, 4,
8892 (__mmask16) -1);
8893}
8894
275be1da
IT
8895extern __inline __mmask16
8896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8897_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8898{
8899 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8900 (__v16si) __Y, 4,
8901 (__mmask16) __M);
8902}
8903
8904extern __inline __mmask16
8905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8906_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8907{
8908 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8909 (__v16si) __Y, 4,
8910 (__mmask16) __M);
8911}
8912
d256b866
IT
8913extern __inline __mmask16
8914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8915_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8916{
8917 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8918 (__v16si) __Y, 4,
8919 (__mmask16) -1);
8920}
8921
275be1da
IT
8922extern __inline __mmask8
8923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8924_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8925{
8926 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8927 (__v8di) __Y, 4,
8928 (__mmask8) __M);
8929}
8930
d256b866
IT
8931extern __inline __mmask8
8932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8933_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8934{
8935 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8936 (__v8di) __Y, 4,
8937 (__mmask8) -1);
8938}
8939
275be1da
IT
8940extern __inline __mmask8
8941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8942_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8943{
8944 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8945 (__v8di) __Y, 4,
8946 (__mmask8) __M);
8947}
8948
d256b866
IT
8949extern __inline __mmask8
8950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8951_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8952{
8953 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8954 (__v8di) __Y, 4,
8955 (__mmask8) -1);
8956}
8957
756c5857
AI
8958#define _MM_CMPINT_EQ 0x0
8959#define _MM_CMPINT_LT 0x1
8960#define _MM_CMPINT_LE 0x2
8961#define _MM_CMPINT_UNUSED 0x3
8962#define _MM_CMPINT_NE 0x4
8963#define _MM_CMPINT_NLT 0x5
8964#define _MM_CMPINT_GE 0x5
8965#define _MM_CMPINT_NLE 0x6
8966#define _MM_CMPINT_GT 0x6
8967
8968#ifdef __OPTIMIZE__
8969extern __inline __mmask8
8970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8971_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8972{
8973 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8974 (__v8di) __Y, __P,
8975 (__mmask8) -1);
8976}
8977
8978extern __inline __mmask16
8979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8980_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8981{
8982 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8983 (__v16si) __Y, __P,
8984 (__mmask16) -1);
8985}
8986
8987extern __inline __mmask8
8988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8989_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8990{
8991 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8992 (__v8di) __Y, __P,
8993 (__mmask8) -1);
8994}
8995
8996extern __inline __mmask16
8997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8998_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
8999{
9000 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9001 (__v16si) __Y, __P,
9002 (__mmask16) -1);
9003}
9004
9005extern __inline __mmask8
9006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9007_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9008 const int __R)
9009{
9010 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9011 (__v8df) __Y, __P,
9012 (__mmask8) -1, __R);
9013}
9014
9015extern __inline __mmask16
9016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9017_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9018{
9019 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9020 (__v16sf) __Y, __P,
9021 (__mmask16) -1, __R);
9022}
9023
9024extern __inline __mmask8
9025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9026_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9027 const int __P)
9028{
9029 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9030 (__v8di) __Y, __P,
9031 (__mmask8) __U);
9032}
9033
9034extern __inline __mmask16
9035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9036_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9037 const int __P)
9038{
9039 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9040 (__v16si) __Y, __P,
9041 (__mmask16) __U);
9042}
9043
9044extern __inline __mmask8
9045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9046_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9047 const int __P)
9048{
9049 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9050 (__v8di) __Y, __P,
9051 (__mmask8) __U);
9052}
9053
9054extern __inline __mmask16
9055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9056_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9057 const int __P)
9058{
9059 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9060 (__v16si) __Y, __P,
9061 (__mmask16) __U);
9062}
9063
9064extern __inline __mmask8
9065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9066_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9067 const int __P, const int __R)
9068{
9069 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9070 (__v8df) __Y, __P,
9071 (__mmask8) __U, __R);
9072}
9073
9074extern __inline __mmask16
9075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9076_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9077 const int __P, const int __R)
9078{
9079 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9080 (__v16sf) __Y, __P,
9081 (__mmask16) __U, __R);
9082}
9083
9084extern __inline __mmask8
9085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9086_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9087{
9088 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9089 (__v2df) __Y, __P,
9090 (__mmask8) -1, __R);
9091}
9092
9093extern __inline __mmask8
9094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9095_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9096 const int __P, const int __R)
9097{
9098 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9099 (__v2df) __Y, __P,
9100 (__mmask8) __M, __R);
9101}
9102
9103extern __inline __mmask8
9104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9105_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9106{
9107 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9108 (__v4sf) __Y, __P,
9109 (__mmask8) -1, __R);
9110}
9111
9112extern __inline __mmask8
9113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9114_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9115 const int __P, const int __R)
9116{
9117 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9118 (__v4sf) __Y, __P,
9119 (__mmask8) __M, __R);
9120}
9121
9122#else
9123#define _mm512_cmp_epi64_mask(X, Y, P) \
9124 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9125 (__v8di)(__m512i)(Y), (int)(P),\
9126 (__mmask8)-1))
9127
9128#define _mm512_cmp_epi32_mask(X, Y, P) \
383321ec
UB
9129 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9130 (__v16si)(__m512i)(Y), (int)(P), \
9131 (__mmask16)-1))
756c5857
AI
9132
9133#define _mm512_cmp_epu64_mask(X, Y, P) \
9134 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9135 (__v8di)(__m512i)(Y), (int)(P),\
9136 (__mmask8)-1))
9137
9138#define _mm512_cmp_epu32_mask(X, Y, P) \
383321ec
UB
9139 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9140 (__v16si)(__m512i)(Y), (int)(P), \
9141 (__mmask16)-1))
756c5857 9142
383321ec 9143#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
756c5857
AI
9144 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9145 (__v8df)(__m512d)(Y), (int)(P),\
9146 (__mmask8)-1, R))
9147
383321ec 9148#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
756c5857
AI
9149 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9150 (__v16sf)(__m512)(Y), (int)(P),\
9151 (__mmask16)-1, R))
9152
383321ec 9153#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
756c5857
AI
9154 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9155 (__v8di)(__m512i)(Y), (int)(P),\
9156 (__mmask8)M))
9157
383321ec
UB
9158#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9159 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9160 (__v16si)(__m512i)(Y), (int)(P), \
9161 (__mmask16)M))
756c5857 9162
383321ec 9163#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
756c5857
AI
9164 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9165 (__v8di)(__m512i)(Y), (int)(P),\
9166 (__mmask8)M))
9167
383321ec
UB
9168#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9169 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9170 (__v16si)(__m512i)(Y), (int)(P), \
9171 (__mmask16)M))
756c5857 9172
383321ec 9173#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
756c5857
AI
9174 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9175 (__v8df)(__m512d)(Y), (int)(P),\
9176 (__mmask8)M, R))
9177
383321ec 9178#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
756c5857
AI
9179 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9180 (__v16sf)(__m512)(Y), (int)(P),\
9181 (__mmask16)M, R))
9182
383321ec 9183#define _mm_cmp_round_sd_mask(X, Y, P, R) \
756c5857
AI
9184 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9185 (__v2df)(__m128d)(Y), (int)(P),\
9186 (__mmask8)-1, R))
9187
383321ec 9188#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
756c5857
AI
9189 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9190 (__v2df)(__m128d)(Y), (int)(P),\
9191 (M), R))
9192
383321ec 9193#define _mm_cmp_round_ss_mask(X, Y, P, R) \
756c5857
AI
9194 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9195 (__v4sf)(__m128)(Y), (int)(P), \
9196 (__mmask8)-1, R))
9197
383321ec 9198#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
756c5857
AI
9199 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9200 (__v4sf)(__m128)(Y), (int)(P), \
9201 (M), R))
9202#endif
9203
9204#ifdef __OPTIMIZE__
9205extern __inline __m512
9206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9207_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9208{
b5fd0b71
JJ
9209 __m512 __v1_old = _mm512_undefined_ps ();
9210 __mmask16 __mask = 0xFFFF;
756c5857 9211
b5fd0b71 9212 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
9213 __addr,
9214 (__v16si) __index,
b5fd0b71 9215 __mask, __scale);
756c5857
AI
9216}
9217
9218extern __inline __m512
9219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 9220_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
756c5857
AI
9221 __m512i __index, float const *__addr, int __scale)
9222{
b5fd0b71 9223 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
9224 __addr,
9225 (__v16si) __index,
9226 __mask, __scale);
9227}
9228
9229extern __inline __m512d
9230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9231_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9232{
b5fd0b71
JJ
9233 __m512d __v1_old = _mm512_undefined_pd ();
9234 __mmask8 __mask = 0xFF;
756c5857 9235
b5fd0b71 9236 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
756c5857 9237 __addr,
b5fd0b71 9238 (__v8si) __index, __mask,
756c5857
AI
9239 __scale);
9240}
9241
9242extern __inline __m512d
9243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9244_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9245 __m256i __index, double const *__addr, int __scale)
9246{
9247 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9248 __addr,
9249 (__v8si) __index,
9250 __mask, __scale);
9251}
9252
9253extern __inline __m256
9254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9255_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9256{
b5fd0b71
JJ
9257 __m256 __v1_old = _mm256_undefined_ps ();
9258 __mmask8 __mask = 0xFF;
756c5857 9259
b5fd0b71 9260 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
756c5857 9261 __addr,
b5fd0b71 9262 (__v8di) __index, __mask,
756c5857
AI
9263 __scale);
9264}
9265
9266extern __inline __m256
9267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9268_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9269 __m512i __index, float const *__addr, int __scale)
9270{
9271 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9272 __addr,
9273 (__v8di) __index,
9274 __mask, __scale);
9275}
9276
9277extern __inline __m512d
9278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9279_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9280{
b5fd0b71
JJ
9281 __m512d __v1_old = _mm512_undefined_pd ();
9282 __mmask8 __mask = 0xFF;
756c5857 9283
b5fd0b71 9284 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
756c5857 9285 __addr,
b5fd0b71 9286 (__v8di) __index, __mask,
756c5857
AI
9287 __scale);
9288}
9289
9290extern __inline __m512d
9291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9292_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9293 __m512i __index, double const *__addr, int __scale)
9294{
9295 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9296 __addr,
9297 (__v8di) __index,
9298 __mask, __scale);
9299}
9300
9301extern __inline __m512i
9302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9303_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9304{
b5fd0b71
JJ
9305 __m512i __v1_old = _mm512_undefined_epi32 ();
9306 __mmask16 __mask = 0xFFFF;
756c5857 9307
b5fd0b71 9308 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
756c5857
AI
9309 __addr,
9310 (__v16si) __index,
b5fd0b71 9311 __mask, __scale);
756c5857
AI
9312}
9313
9314extern __inline __m512i
9315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9316_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9317 __m512i __index, int const *__addr, int __scale)
9318{
9319 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9320 __addr,
9321 (__v16si) __index,
9322 __mask, __scale);
9323}
9324
9325extern __inline __m512i
9326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9327_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9328{
b5fd0b71
JJ
9329 __m512i __v1_old = _mm512_undefined_epi32 ();
9330 __mmask8 __mask = 0xFF;
756c5857 9331
b5fd0b71 9332 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
756c5857 9333 __addr,
b5fd0b71 9334 (__v8si) __index, __mask,
756c5857
AI
9335 __scale);
9336}
9337
9338extern __inline __m512i
9339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9340_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9341 __m256i __index, long long const *__addr,
9342 int __scale)
9343{
9344 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9345 __addr,
9346 (__v8si) __index,
9347 __mask, __scale);
9348}
9349
9350extern __inline __m256i
9351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9352_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9353{
b5fd0b71
JJ
9354 __m256i __v1_old = _mm256_undefined_si256 ();
9355 __mmask8 __mask = 0xFF;
756c5857 9356
b5fd0b71 9357 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
756c5857
AI
9358 __addr,
9359 (__v8di) __index,
b5fd0b71 9360 __mask, __scale);
756c5857
AI
9361}
9362
9363extern __inline __m256i
9364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9365_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9366 __m512i __index, int const *__addr, int __scale)
9367{
9368 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9369 __addr,
9370 (__v8di) __index,
9371 __mask, __scale);
9372}
9373
9374extern __inline __m512i
9375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9377{
b5fd0b71
JJ
9378 __m512i __v1_old = _mm512_undefined_epi32 ();
9379 __mmask8 __mask = 0xFF;
756c5857 9380
b5fd0b71 9381 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
756c5857 9382 __addr,
b5fd0b71 9383 (__v8di) __index, __mask,
756c5857
AI
9384 __scale);
9385}
9386
9387extern __inline __m512i
9388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9389_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9390 __m512i __index, long long const *__addr,
9391 int __scale)
9392{
9393 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9394 __addr,
9395 (__v8di) __index,
9396 __mask, __scale);
9397}
9398
9399extern __inline void
9400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9401_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9402{
9403 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9404 (__v16si) __index, (__v16sf) __v1, __scale);
9405}
9406
9407extern __inline void
9408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9409_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9410 __m512i __index, __m512 __v1, int __scale)
9411{
9412 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9413 (__v16sf) __v1, __scale);
9414}
9415
9416extern __inline void
9417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9418_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9419 int __scale)
9420{
9421 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9422 (__v8si) __index, (__v8df) __v1, __scale);
9423}
9424
9425extern __inline void
9426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9427_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9428 __m256i __index, __m512d __v1, int __scale)
9429{
9430 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9431 (__v8df) __v1, __scale);
9432}
9433
9434extern __inline void
9435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9436_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9437{
9438 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9439 (__v8di) __index, (__v8sf) __v1, __scale);
9440}
9441
9442extern __inline void
9443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9444_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9445 __m512i __index, __m256 __v1, int __scale)
9446{
9447 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9448 (__v8sf) __v1, __scale);
9449}
9450
9451extern __inline void
9452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9453_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9454 int __scale)
9455{
9456 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9457 (__v8di) __index, (__v8df) __v1, __scale);
9458}
9459
9460extern __inline void
9461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9462_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9463 __m512i __index, __m512d __v1, int __scale)
9464{
9465 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9466 (__v8df) __v1, __scale);
9467}
9468
9469extern __inline void
9470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9471_mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9472 __m512i __v1, int __scale)
9473{
9474 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9475 (__v16si) __index, (__v16si) __v1, __scale);
9476}
9477
9478extern __inline void
9479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9480_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9481 __m512i __index, __m512i __v1, int __scale)
9482{
9483 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9484 (__v16si) __v1, __scale);
9485}
9486
9487extern __inline void
9488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9489_mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9490 __m512i __v1, int __scale)
9491{
9492 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9493 (__v8si) __index, (__v8di) __v1, __scale);
9494}
9495
9496extern __inline void
9497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9498_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9499 __m256i __index, __m512i __v1, int __scale)
9500{
9501 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9502 (__v8di) __v1, __scale);
9503}
9504
9505extern __inline void
9506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9507_mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9508 __m256i __v1, int __scale)
9509{
9510 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9511 (__v8di) __index, (__v8si) __v1, __scale);
9512}
9513
9514extern __inline void
9515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9516_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9517 __m512i __index, __m256i __v1, int __scale)
9518{
9519 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9520 (__v8si) __v1, __scale);
9521}
9522
9523extern __inline void
9524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9525_mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9526 __m512i __v1, int __scale)
9527{
9528 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9529 (__v8di) __index, (__v8di) __v1, __scale);
9530}
9531
9532extern __inline void
9533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9534_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9535 __m512i __index, __m512i __v1, int __scale)
9536{
9537 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9538 (__v8di) __v1, __scale);
9539}
9540#else
9541#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
0b192937 9542 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
756c5857
AI
9543 (float const *)ADDR, \
9544 (__v16si)(__m512i)INDEX, \
9545 (__mmask16)0xFFFF, (int)SCALE)
9546
9547#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9548 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9549 (float const *)ADDR, \
9550 (__v16si)(__m512i)INDEX, \
9551 (__mmask16)MASK, (int)SCALE)
9552
9553#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
0b192937 9554 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
756c5857
AI
9555 (double const *)ADDR, \
9556 (__v8si)(__m256i)INDEX, \
9557 (__mmask8)0xFF, (int)SCALE)
9558
9559#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9560 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9561 (double const *)ADDR, \
9562 (__v8si)(__m256i)INDEX, \
9563 (__mmask8)MASK, (int)SCALE)
9564
9565#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
0b192937 9566 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
756c5857
AI
9567 (float const *)ADDR, \
9568 (__v8di)(__m512i)INDEX, \
9569 (__mmask8)0xFF, (int)SCALE)
9570
9571#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9572 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9573 (float const *)ADDR, \
9574 (__v8di)(__m512i)INDEX, \
9575 (__mmask8)MASK, (int)SCALE)
9576
9577#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
0b192937 9578 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
756c5857
AI
9579 (double const *)ADDR, \
9580 (__v8di)(__m512i)INDEX, \
9581 (__mmask8)0xFF, (int)SCALE)
9582
9583#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9584 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9585 (double const *)ADDR, \
9586 (__v8di)(__m512i)INDEX, \
9587 (__mmask8)MASK, (int)SCALE)
9588
9589#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
4271e5cb 9590 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
9591 (int const *)ADDR, \
9592 (__v16si)(__m512i)INDEX, \
9593 (__mmask16)0xFFFF, (int)SCALE)
9594
9595#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9596 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9597 (int const *)ADDR, \
9598 (__v16si)(__m512i)INDEX, \
9599 (__mmask16)MASK, (int)SCALE)
9600
9601#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
4271e5cb 9602 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
9603 (long long const *)ADDR, \
9604 (__v8si)(__m256i)INDEX, \
9605 (__mmask8)0xFF, (int)SCALE)
9606
9607#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9608 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9609 (long long const *)ADDR, \
9610 (__v8si)(__m256i)INDEX, \
9611 (__mmask8)MASK, (int)SCALE)
9612
9613#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
0b192937 9614 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
756c5857
AI
9615 (int const *)ADDR, \
9616 (__v8di)(__m512i)INDEX, \
9617 (__mmask8)0xFF, (int)SCALE)
9618
9619#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9620 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9621 (int const *)ADDR, \
9622 (__v8di)(__m512i)INDEX, \
9623 (__mmask8)MASK, (int)SCALE)
9624
9625#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
4271e5cb 9626 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
9627 (long long const *)ADDR, \
9628 (__v8di)(__m512i)INDEX, \
9629 (__mmask8)0xFF, (int)SCALE)
9630
9631#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9632 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9633 (long long const *)ADDR, \
9634 (__v8di)(__m512i)INDEX, \
9635 (__mmask8)MASK, (int)SCALE)
9636
9637#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9638 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9639 (__v16si)(__m512i)INDEX, \
9640 (__v16sf)(__m512)V1, (int)SCALE)
9641
9642#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9643 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9644 (__v16si)(__m512i)INDEX, \
9645 (__v16sf)(__m512)V1, (int)SCALE)
9646
9647#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9648 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9649 (__v8si)(__m256i)INDEX, \
9650 (__v8df)(__m512d)V1, (int)SCALE)
9651
9652#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9653 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9654 (__v8si)(__m256i)INDEX, \
9655 (__v8df)(__m512d)V1, (int)SCALE)
9656
9657#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9658 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9659 (__v8di)(__m512i)INDEX, \
9660 (__v8sf)(__m256)V1, (int)SCALE)
9661
9662#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9663 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9664 (__v8di)(__m512i)INDEX, \
9665 (__v8sf)(__m256)V1, (int)SCALE)
9666
9667#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9668 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9669 (__v8di)(__m512i)INDEX, \
9670 (__v8df)(__m512d)V1, (int)SCALE)
9671
9672#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9673 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9674 (__v8di)(__m512i)INDEX, \
9675 (__v8df)(__m512d)V1, (int)SCALE)
9676
9677#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9678 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9679 (__v16si)(__m512i)INDEX, \
9680 (__v16si)(__m512i)V1, (int)SCALE)
9681
9682#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9683 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9684 (__v16si)(__m512i)INDEX, \
9685 (__v16si)(__m512i)V1, (int)SCALE)
9686
9687#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9688 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9689 (__v8si)(__m256i)INDEX, \
9690 (__v8di)(__m512i)V1, (int)SCALE)
9691
9692#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9693 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9694 (__v8si)(__m256i)INDEX, \
9695 (__v8di)(__m512i)V1, (int)SCALE)
9696
9697#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9698 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9699 (__v8di)(__m512i)INDEX, \
9700 (__v8si)(__m256i)V1, (int)SCALE)
9701
9702#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9703 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9704 (__v8di)(__m512i)INDEX, \
9705 (__v8si)(__m256i)V1, (int)SCALE)
9706
9707#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9708 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9709 (__v8di)(__m512i)INDEX, \
9710 (__v8di)(__m512i)V1, (int)SCALE)
9711
9712#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9713 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9714 (__v8di)(__m512i)INDEX, \
9715 (__v8di)(__m512i)V1, (int)SCALE)
9716#endif
9717
9718extern __inline __m512d
9719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9720_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9721{
9722 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9723 (__v8df) __W,
9724 (__mmask8) __U);
9725}
9726
9727extern __inline __m512d
9728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9729_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9730{
9731 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9732 (__v8df)
9733 _mm512_setzero_pd (),
9734 (__mmask8) __U);
9735}
9736
9737extern __inline void
9738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9739_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9740{
9741 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9742 (__mmask8) __U);
9743}
9744
9745extern __inline __m512
9746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9747_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9748{
9749 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9750 (__v16sf) __W,
9751 (__mmask16) __U);
9752}
9753
9754extern __inline __m512
9755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9756_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9757{
9758 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9759 (__v16sf)
9760 _mm512_setzero_ps (),
9761 (__mmask16) __U);
9762}
9763
9764extern __inline void
9765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9766_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9767{
9768 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9769 (__mmask16) __U);
9770}
9771
9772extern __inline __m512i
9773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9774_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9775{
9776 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9777 (__v8di) __W,
9778 (__mmask8) __U);
9779}
9780
9781extern __inline __m512i
9782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9783_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9784{
9785 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9786 (__v8di)
9787 _mm512_setzero_si512 (),
9788 (__mmask8) __U);
9789}
9790
9791extern __inline void
9792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9793_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9794{
9795 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9796 (__mmask8) __U);
9797}
9798
9799extern __inline __m512i
9800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9801_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9802{
9803 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9804 (__v16si) __W,
9805 (__mmask16) __U);
9806}
9807
9808extern __inline __m512i
9809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9810_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9811{
9812 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9813 (__v16si)
9814 _mm512_setzero_si512 (),
9815 (__mmask16) __U);
9816}
9817
9818extern __inline void
9819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9820_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9821{
9822 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9823 (__mmask16) __U);
9824}
9825
9826extern __inline __m512d
9827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9828_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9829{
9830 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9831 (__v8df) __W,
9832 (__mmask8) __U);
9833}
9834
9835extern __inline __m512d
9836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9837_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9838{
9839 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9840 (__v8df)
9841 _mm512_setzero_pd (),
9842 (__mmask8) __U);
9843}
9844
9845extern __inline __m512d
9846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9847_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9848{
9849 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9850 (__v8df) __W,
9851 (__mmask8) __U);
9852}
9853
9854extern __inline __m512d
9855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9856_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9857{
9858 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9859 (__v8df)
9860 _mm512_setzero_pd (),
9861 (__mmask8) __U);
9862}
9863
9864extern __inline __m512
9865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9866_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9867{
9868 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9869 (__v16sf) __W,
9870 (__mmask16) __U);
9871}
9872
9873extern __inline __m512
9874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9875_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9876{
9877 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9878 (__v16sf)
9879 _mm512_setzero_ps (),
9880 (__mmask16) __U);
9881}
9882
9883extern __inline __m512
9884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9885_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9886{
9887 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9888 (__v16sf) __W,
9889 (__mmask16) __U);
9890}
9891
9892extern __inline __m512
9893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9894_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9895{
9896 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9897 (__v16sf)
9898 _mm512_setzero_ps (),
9899 (__mmask16) __U);
9900}
9901
9902extern __inline __m512i
9903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9904_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9905{
9906 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9907 (__v8di) __W,
9908 (__mmask8) __U);
9909}
9910
9911extern __inline __m512i
9912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9913_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9914{
9915 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9916 (__v8di)
9917 _mm512_setzero_si512 (),
9918 (__mmask8) __U);
9919}
9920
9921extern __inline __m512i
9922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9923_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9924{
9925 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9926 (__v8di) __W,
9927 (__mmask8) __U);
9928}
9929
9930extern __inline __m512i
9931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9932_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9933{
9934 return (__m512i)
9935 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9936 (__v8di)
9937 _mm512_setzero_si512 (),
9938 (__mmask8) __U);
9939}
9940
9941extern __inline __m512i
9942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9943_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9944{
9945 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9946 (__v16si) __W,
9947 (__mmask16) __U);
9948}
9949
9950extern __inline __m512i
9951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9952_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9953{
9954 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9955 (__v16si)
9956 _mm512_setzero_si512 (),
9957 (__mmask16) __U);
9958}
9959
9960extern __inline __m512i
9961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9962_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9963{
9964 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9965 (__v16si) __W,
9966 (__mmask16) __U);
9967}
9968
9969extern __inline __m512i
9970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9971_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9972{
9973 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9974 (__v16si)
9975 _mm512_setzero_si512
9976 (), (__mmask16) __U);
9977}
9978
9979/* Mask arithmetic operations */
6901ea62
AS
9980#define _kand_mask16 _mm512_kand
9981#define _kandn_mask16 _mm512_kandn
9982#define _knot_mask16 _mm512_knot
9983#define _kor_mask16 _mm512_kor
9984#define _kxnor_mask16 _mm512_kxnor
9985#define _kxor_mask16 _mm512_kxor
9986
756c5857
AI
9987extern __inline __mmask16
9988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9989_mm512_kand (__mmask16 __A, __mmask16 __B)
9990{
9991 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9992}
9993
9994extern __inline __mmask16
9995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9996_mm512_kandn (__mmask16 __A, __mmask16 __B)
9997{
6901ea62
AS
9998 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
9999 (__mmask16) __B);
756c5857
AI
10000}
10001
10002extern __inline __mmask16
10003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10004_mm512_kor (__mmask16 __A, __mmask16 __B)
10005{
10006 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10007}
10008
10009extern __inline int
10010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10011_mm512_kortestz (__mmask16 __A, __mmask16 __B)
10012{
10013 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10014 (__mmask16) __B);
10015}
10016
10017extern __inline int
10018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10019_mm512_kortestc (__mmask16 __A, __mmask16 __B)
10020{
10021 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10022 (__mmask16) __B);
10023}
10024
10025extern __inline __mmask16
10026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10027_mm512_kxnor (__mmask16 __A, __mmask16 __B)
10028{
10029 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10030}
10031
10032extern __inline __mmask16
10033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10034_mm512_kxor (__mmask16 __A, __mmask16 __B)
10035{
10036 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10037}
10038
10039extern __inline __mmask16
10040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10041_mm512_knot (__mmask16 __A)
10042{
10043 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10044}
10045
10046extern __inline __mmask16
10047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10049{
10050 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10051}
10052
6901ea62
AS
10053extern __inline __mmask16
10054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10055_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10056{
10057 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10058}
10059
756c5857
AI
10060#ifdef __OPTIMIZE__
10061extern __inline __m512i
10062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10063_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10064 const int __imm)
10065{
10066 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10067 (__v4si) __D,
10068 __imm,
10069 (__v16si)
10070 _mm512_setzero_si512 (),
10071 __B);
10072}
10073
10074extern __inline __m512
10075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10076_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10077 const int __imm)
10078{
10079 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10080 (__v4sf) __D,
10081 __imm,
10082 (__v16sf)
10083 _mm512_setzero_ps (), __B);
10084}
10085
10086extern __inline __m512i
10087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10088_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10089 __m128i __D, const int __imm)
10090{
10091 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10092 (__v4si) __D,
10093 __imm,
10094 (__v16si) __A,
10095 __B);
10096}
10097
10098extern __inline __m512
10099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10100_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10101 __m128 __D, const int __imm)
10102{
10103 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10104 (__v4sf) __D,
10105 __imm,
10106 (__v16sf) __A, __B);
10107}
10108#else
10109#define _mm512_maskz_insertf32x4(A, X, Y, C) \
10110 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10111 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10112 (__mmask8)(A)))
10113
10114#define _mm512_maskz_inserti32x4(A, X, Y, C) \
10115 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10116 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10117 (__mmask8)(A)))
10118
10119#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10120 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10121 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10122 (__mmask8)(B)))
10123
10124#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10125 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10126 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10127 (__mmask8)(B)))
10128#endif
10129
10130extern __inline __m512i
10131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10132_mm512_max_epi64 (__m512i __A, __m512i __B)
10133{
10134 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10135 (__v8di) __B,
10136 (__v8di)
4271e5cb 10137 _mm512_undefined_epi32 (),
756c5857
AI
10138 (__mmask8) -1);
10139}
10140
10141extern __inline __m512i
10142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10144{
10145 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10146 (__v8di) __B,
10147 (__v8di)
10148 _mm512_setzero_si512 (),
10149 __M);
10150}
10151
10152extern __inline __m512i
10153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10154_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10155{
10156 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10157 (__v8di) __B,
10158 (__v8di) __W, __M);
10159}
10160
10161extern __inline __m512i
10162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10163_mm512_min_epi64 (__m512i __A, __m512i __B)
10164{
10165 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10166 (__v8di) __B,
10167 (__v8di)
4271e5cb 10168 _mm512_undefined_epi32 (),
756c5857
AI
10169 (__mmask8) -1);
10170}
10171
10172extern __inline __m512i
10173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10174_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10175{
10176 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10177 (__v8di) __B,
10178 (__v8di) __W, __M);
10179}
10180
10181extern __inline __m512i
10182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10183_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10184{
10185 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10186 (__v8di) __B,
10187 (__v8di)
10188 _mm512_setzero_si512 (),
10189 __M);
10190}
10191
10192extern __inline __m512i
10193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10194_mm512_max_epu64 (__m512i __A, __m512i __B)
10195{
10196 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10197 (__v8di) __B,
10198 (__v8di)
4271e5cb 10199 _mm512_undefined_epi32 (),
756c5857
AI
10200 (__mmask8) -1);
10201}
10202
10203extern __inline __m512i
10204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10206{
10207 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10208 (__v8di) __B,
10209 (__v8di)
10210 _mm512_setzero_si512 (),
10211 __M);
10212}
10213
10214extern __inline __m512i
10215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10216_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10217{
10218 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10219 (__v8di) __B,
10220 (__v8di) __W, __M);
10221}
10222
10223extern __inline __m512i
10224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10225_mm512_min_epu64 (__m512i __A, __m512i __B)
10226{
10227 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10228 (__v8di) __B,
10229 (__v8di)
4271e5cb 10230 _mm512_undefined_epi32 (),
756c5857
AI
10231 (__mmask8) -1);
10232}
10233
10234extern __inline __m512i
10235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10236_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10237{
10238 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10239 (__v8di) __B,
10240 (__v8di) __W, __M);
10241}
10242
10243extern __inline __m512i
10244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10245_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10246{
10247 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10248 (__v8di) __B,
10249 (__v8di)
10250 _mm512_setzero_si512 (),
10251 __M);
10252}
10253
10254extern __inline __m512i
10255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10256_mm512_max_epi32 (__m512i __A, __m512i __B)
10257{
10258 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10259 (__v16si) __B,
10260 (__v16si)
4271e5cb 10261 _mm512_undefined_epi32 (),
756c5857
AI
10262 (__mmask16) -1);
10263}
10264
10265extern __inline __m512i
10266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10267_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10268{
10269 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10270 (__v16si) __B,
10271 (__v16si)
10272 _mm512_setzero_si512 (),
10273 __M);
10274}
10275
10276extern __inline __m512i
10277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10278_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10279{
10280 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10281 (__v16si) __B,
10282 (__v16si) __W, __M);
10283}
10284
10285extern __inline __m512i
10286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10287_mm512_min_epi32 (__m512i __A, __m512i __B)
10288{
10289 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10290 (__v16si) __B,
10291 (__v16si)
4271e5cb 10292 _mm512_undefined_epi32 (),
756c5857
AI
10293 (__mmask16) -1);
10294}
10295
10296extern __inline __m512i
10297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10298_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10299{
10300 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10301 (__v16si) __B,
10302 (__v16si)
10303 _mm512_setzero_si512 (),
10304 __M);
10305}
10306
10307extern __inline __m512i
10308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10309_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10310{
10311 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10312 (__v16si) __B,
10313 (__v16si) __W, __M);
10314}
10315
10316extern __inline __m512i
10317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10318_mm512_max_epu32 (__m512i __A, __m512i __B)
10319{
10320 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10321 (__v16si) __B,
10322 (__v16si)
4271e5cb 10323 _mm512_undefined_epi32 (),
756c5857
AI
10324 (__mmask16) -1);
10325}
10326
10327extern __inline __m512i
10328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10329_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10330{
10331 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10332 (__v16si) __B,
10333 (__v16si)
10334 _mm512_setzero_si512 (),
10335 __M);
10336}
10337
10338extern __inline __m512i
10339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10340_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10341{
10342 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10343 (__v16si) __B,
10344 (__v16si) __W, __M);
10345}
10346
10347extern __inline __m512i
10348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10349_mm512_min_epu32 (__m512i __A, __m512i __B)
10350{
10351 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10352 (__v16si) __B,
10353 (__v16si)
4271e5cb 10354 _mm512_undefined_epi32 (),
756c5857
AI
10355 (__mmask16) -1);
10356}
10357
10358extern __inline __m512i
10359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10360_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10361{
10362 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10363 (__v16si) __B,
10364 (__v16si)
10365 _mm512_setzero_si512 (),
10366 __M);
10367}
10368
10369extern __inline __m512i
10370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10371_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10372{
10373 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10374 (__v16si) __B,
10375 (__v16si) __W, __M);
10376}
10377
10378extern __inline __m512
10379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10380_mm512_unpacklo_ps (__m512 __A, __m512 __B)
10381{
10382 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10383 (__v16sf) __B,
10384 (__v16sf)
0b192937 10385 _mm512_undefined_ps (),
756c5857
AI
10386 (__mmask16) -1);
10387}
10388
10389extern __inline __m512
10390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10391_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10392{
10393 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10394 (__v16sf) __B,
10395 (__v16sf) __W,
10396 (__mmask16) __U);
10397}
10398
10399extern __inline __m512
10400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10401_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10402{
10403 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10404 (__v16sf) __B,
10405 (__v16sf)
10406 _mm512_setzero_ps (),
10407 (__mmask16) __U);
10408}
10409
075691af
AI
10410#ifdef __OPTIMIZE__
10411extern __inline __m128d
10412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10413_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10414{
10415 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10416 (__v2df) __B,
10417 __R);
10418}
10419
10420extern __inline __m128
10421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10422_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10423{
10424 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10425 (__v4sf) __B,
10426 __R);
10427}
10428
10429extern __inline __m128d
10430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10431_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10432{
10433 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10434 (__v2df) __B,
10435 __R);
10436}
10437
10438extern __inline __m128
10439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10440_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10441{
10442 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10443 (__v4sf) __B,
10444 __R);
10445}
10446
10447#else
10448#define _mm_max_round_sd(A, B, C) \
10449 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10450
10451#define _mm_max_round_ss(A, B, C) \
10452 (__m128)__builtin_ia32_addss_round(A, B, C)
10453
10454#define _mm_min_round_sd(A, B, C) \
10455 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10456
10457#define _mm_min_round_ss(A, B, C) \
10458 (__m128)__builtin_ia32_subss_round(A, B, C)
10459#endif
10460
756c5857
AI
10461extern __inline __m512d
10462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10463_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10464{
10465 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10466 (__v8df) __W,
10467 (__mmask8) __U);
10468}
10469
10470extern __inline __m512
10471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10472_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10473{
10474 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10475 (__v16sf) __W,
10476 (__mmask16) __U);
10477}
10478
10479extern __inline __m512i
10480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10481_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10482{
10483 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10484 (__v8di) __W,
10485 (__mmask8) __U);
10486}
10487
10488extern __inline __m512i
10489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10490_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10491{
10492 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10493 (__v16si) __W,
10494 (__mmask16) __U);
10495}
10496
075691af
AI
10497#ifdef __OPTIMIZE__
10498extern __inline __m128d
10499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10500_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10501{
10502 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10503 (__v2df) __A,
10504 (__v2df) __B,
10505 __R);
10506}
10507
10508extern __inline __m128
10509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10510_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10511{
10512 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10513 (__v4sf) __A,
10514 (__v4sf) __B,
10515 __R);
10516}
10517
10518extern __inline __m128d
10519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10520_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10521{
10522 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10523 (__v2df) __A,
10524 -(__v2df) __B,
10525 __R);
10526}
10527
10528extern __inline __m128
10529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10530_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10531{
10532 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10533 (__v4sf) __A,
10534 -(__v4sf) __B,
10535 __R);
10536}
10537
10538extern __inline __m128d
10539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10540_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10541{
10542 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10543 -(__v2df) __A,
10544 (__v2df) __B,
10545 __R);
10546}
10547
10548extern __inline __m128
10549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10550_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10551{
10552 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10553 -(__v4sf) __A,
10554 (__v4sf) __B,
10555 __R);
10556}
10557
10558extern __inline __m128d
10559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10560_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10561{
10562 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10563 -(__v2df) __A,
10564 -(__v2df) __B,
10565 __R);
10566}
10567
10568extern __inline __m128
10569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10570_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10571{
10572 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10573 -(__v4sf) __A,
10574 -(__v4sf) __B,
10575 __R);
10576}
10577#else
10578#define _mm_fmadd_round_sd(A, B, C, R) \
10579 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10580
10581#define _mm_fmadd_round_ss(A, B, C, R) \
10582 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10583
10584#define _mm_fmsub_round_sd(A, B, C, R) \
10585 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10586
10587#define _mm_fmsub_round_ss(A, B, C, R) \
10588 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10589
10590#define _mm_fnmadd_round_sd(A, B, C, R) \
10591 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10592
10593#define _mm_fnmadd_round_ss(A, B, C, R) \
10594 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10595
10596#define _mm_fnmsub_round_sd(A, B, C, R) \
10597 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10598
10599#define _mm_fnmsub_round_ss(A, B, C, R) \
10600 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10601#endif
10602
756c5857
AI
10603#ifdef __OPTIMIZE__
10604extern __inline int
10605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10606_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10607{
10608 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10609}
10610
10611extern __inline int
10612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10613_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10614{
10615 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10616}
10617#else
10618#define _mm_comi_round_ss(A, B, C, D)\
10619__builtin_ia32_vcomiss(A, B, C, D)
10620#define _mm_comi_round_sd(A, B, C, D)\
10621__builtin_ia32_vcomisd(A, B, C, D)
10622#endif
10623
10624extern __inline __m512d
10625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10626_mm512_sqrt_pd (__m512d __A)
10627{
10628 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10629 (__v8df)
0b192937 10630 _mm512_undefined_pd (),
756c5857
AI
10631 (__mmask8) -1,
10632 _MM_FROUND_CUR_DIRECTION);
10633}
10634
10635extern __inline __m512d
10636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10637_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10638{
10639 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10640 (__v8df) __W,
10641 (__mmask8) __U,
10642 _MM_FROUND_CUR_DIRECTION);
10643}
10644
10645extern __inline __m512d
10646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10647_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10648{
10649 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10650 (__v8df)
10651 _mm512_setzero_pd (),
10652 (__mmask8) __U,
10653 _MM_FROUND_CUR_DIRECTION);
10654}
10655
10656extern __inline __m512
10657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658_mm512_sqrt_ps (__m512 __A)
10659{
10660 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10661 (__v16sf)
0b192937 10662 _mm512_undefined_ps (),
756c5857
AI
10663 (__mmask16) -1,
10664 _MM_FROUND_CUR_DIRECTION);
10665}
10666
10667extern __inline __m512
10668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10669_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10670{
10671 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10672 (__v16sf) __W,
10673 (__mmask16) __U,
10674 _MM_FROUND_CUR_DIRECTION);
10675}
10676
10677extern __inline __m512
10678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10679_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10680{
10681 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10682 (__v16sf)
10683 _mm512_setzero_ps (),
10684 (__mmask16) __U,
10685 _MM_FROUND_CUR_DIRECTION);
10686}
10687
10688extern __inline __m512d
10689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10690_mm512_add_pd (__m512d __A, __m512d __B)
10691{
2069d6fc 10692 return (__m512d) ((__v8df)__A + (__v8df)__B);
756c5857
AI
10693}
10694
10695extern __inline __m512d
10696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10697_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10698{
10699 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10700 (__v8df) __B,
10701 (__v8df) __W,
10702 (__mmask8) __U,
10703 _MM_FROUND_CUR_DIRECTION);
10704}
10705
10706extern __inline __m512d
10707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10708_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10709{
10710 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10711 (__v8df) __B,
10712 (__v8df)
10713 _mm512_setzero_pd (),
10714 (__mmask8) __U,
10715 _MM_FROUND_CUR_DIRECTION);
10716}
10717
10718extern __inline __m512
10719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10720_mm512_add_ps (__m512 __A, __m512 __B)
10721{
2069d6fc 10722 return (__m512) ((__v16sf)__A + (__v16sf)__B);
756c5857
AI
10723}
10724
10725extern __inline __m512
10726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10727_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10728{
10729 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10730 (__v16sf) __B,
10731 (__v16sf) __W,
10732 (__mmask16) __U,
10733 _MM_FROUND_CUR_DIRECTION);
10734}
10735
10736extern __inline __m512
10737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10738_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10739{
10740 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10741 (__v16sf) __B,
10742 (__v16sf)
10743 _mm512_setzero_ps (),
10744 (__mmask16) __U,
10745 _MM_FROUND_CUR_DIRECTION);
10746}
10747
10748extern __inline __m512d
10749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10750_mm512_sub_pd (__m512d __A, __m512d __B)
10751{
2069d6fc 10752 return (__m512d) ((__v8df)__A - (__v8df)__B);
756c5857
AI
10753}
10754
10755extern __inline __m512d
10756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10757_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10758{
10759 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10760 (__v8df) __B,
10761 (__v8df) __W,
10762 (__mmask8) __U,
10763 _MM_FROUND_CUR_DIRECTION);
10764}
10765
10766extern __inline __m512d
10767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10768_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10769{
10770 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10771 (__v8df) __B,
10772 (__v8df)
10773 _mm512_setzero_pd (),
10774 (__mmask8) __U,
10775 _MM_FROUND_CUR_DIRECTION);
10776}
10777
10778extern __inline __m512
10779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10780_mm512_sub_ps (__m512 __A, __m512 __B)
10781{
2069d6fc 10782 return (__m512) ((__v16sf)__A - (__v16sf)__B);
756c5857
AI
10783}
10784
10785extern __inline __m512
10786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10788{
10789 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10790 (__v16sf) __B,
10791 (__v16sf) __W,
10792 (__mmask16) __U,
10793 _MM_FROUND_CUR_DIRECTION);
10794}
10795
10796extern __inline __m512
10797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10798_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10799{
10800 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10801 (__v16sf) __B,
10802 (__v16sf)
10803 _mm512_setzero_ps (),
10804 (__mmask16) __U,
10805 _MM_FROUND_CUR_DIRECTION);
10806}
10807
10808extern __inline __m512d
10809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10810_mm512_mul_pd (__m512d __A, __m512d __B)
10811{
2069d6fc 10812 return (__m512d) ((__v8df)__A * (__v8df)__B);
756c5857
AI
10813}
10814
10815extern __inline __m512d
10816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10817_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10818{
10819 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10820 (__v8df) __B,
10821 (__v8df) __W,
10822 (__mmask8) __U,
10823 _MM_FROUND_CUR_DIRECTION);
10824}
10825
10826extern __inline __m512d
10827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10828_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10829{
10830 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10831 (__v8df) __B,
10832 (__v8df)
10833 _mm512_setzero_pd (),
10834 (__mmask8) __U,
10835 _MM_FROUND_CUR_DIRECTION);
10836}
10837
10838extern __inline __m512
10839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10840_mm512_mul_ps (__m512 __A, __m512 __B)
10841{
2069d6fc 10842 return (__m512) ((__v16sf)__A * (__v16sf)__B);
756c5857
AI
10843}
10844
10845extern __inline __m512
10846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10847_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10848{
10849 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10850 (__v16sf) __B,
10851 (__v16sf) __W,
10852 (__mmask16) __U,
10853 _MM_FROUND_CUR_DIRECTION);
10854}
10855
10856extern __inline __m512
10857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10858_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10859{
10860 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10861 (__v16sf) __B,
10862 (__v16sf)
10863 _mm512_setzero_ps (),
10864 (__mmask16) __U,
10865 _MM_FROUND_CUR_DIRECTION);
10866}
10867
10868extern __inline __m512d
10869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10870_mm512_div_pd (__m512d __M, __m512d __V)
10871{
2069d6fc 10872 return (__m512d) ((__v8df)__M / (__v8df)__V);
756c5857
AI
10873}
10874
10875extern __inline __m512d
10876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10877_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10878{
10879 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10880 (__v8df) __V,
10881 (__v8df) __W,
10882 (__mmask8) __U,
10883 _MM_FROUND_CUR_DIRECTION);
10884}
10885
10886extern __inline __m512d
10887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10888_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10889{
10890 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10891 (__v8df) __V,
10892 (__v8df)
10893 _mm512_setzero_pd (),
10894 (__mmask8) __U,
10895 _MM_FROUND_CUR_DIRECTION);
10896}
10897
10898extern __inline __m512
10899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10900_mm512_div_ps (__m512 __A, __m512 __B)
10901{
2069d6fc 10902 return (__m512) ((__v16sf)__A / (__v16sf)__B);
756c5857
AI
10903}
10904
10905extern __inline __m512
10906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10907_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10908{
10909 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10910 (__v16sf) __B,
10911 (__v16sf) __W,
10912 (__mmask16) __U,
10913 _MM_FROUND_CUR_DIRECTION);
10914}
10915
10916extern __inline __m512
10917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10918_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10919{
10920 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10921 (__v16sf) __B,
10922 (__v16sf)
10923 _mm512_setzero_ps (),
10924 (__mmask16) __U,
10925 _MM_FROUND_CUR_DIRECTION);
10926}
10927
10928extern __inline __m512d
10929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10930_mm512_max_pd (__m512d __A, __m512d __B)
10931{
10932 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10933 (__v8df) __B,
10934 (__v8df)
0b192937 10935 _mm512_undefined_pd (),
756c5857
AI
10936 (__mmask8) -1,
10937 _MM_FROUND_CUR_DIRECTION);
10938}
10939
10940extern __inline __m512d
10941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10943{
10944 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10945 (__v8df) __B,
10946 (__v8df) __W,
10947 (__mmask8) __U,
10948 _MM_FROUND_CUR_DIRECTION);
10949}
10950
10951extern __inline __m512d
10952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10954{
10955 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10956 (__v8df) __B,
10957 (__v8df)
10958 _mm512_setzero_pd (),
10959 (__mmask8) __U,
10960 _MM_FROUND_CUR_DIRECTION);
10961}
10962
10963extern __inline __m512
10964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10965_mm512_max_ps (__m512 __A, __m512 __B)
10966{
10967 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10968 (__v16sf) __B,
10969 (__v16sf)
0b192937 10970 _mm512_undefined_ps (),
756c5857
AI
10971 (__mmask16) -1,
10972 _MM_FROUND_CUR_DIRECTION);
10973}
10974
10975extern __inline __m512
10976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10977_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10978{
10979 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10980 (__v16sf) __B,
10981 (__v16sf) __W,
10982 (__mmask16) __U,
10983 _MM_FROUND_CUR_DIRECTION);
10984}
10985
10986extern __inline __m512
10987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10988_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10989{
10990 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10991 (__v16sf) __B,
10992 (__v16sf)
10993 _mm512_setzero_ps (),
10994 (__mmask16) __U,
10995 _MM_FROUND_CUR_DIRECTION);
10996}
10997
10998extern __inline __m512d
10999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11000_mm512_min_pd (__m512d __A, __m512d __B)
11001{
11002 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11003 (__v8df) __B,
11004 (__v8df)
0b192937 11005 _mm512_undefined_pd (),
756c5857
AI
11006 (__mmask8) -1,
11007 _MM_FROUND_CUR_DIRECTION);
11008}
11009
11010extern __inline __m512d
11011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11012_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11013{
11014 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11015 (__v8df) __B,
11016 (__v8df) __W,
11017 (__mmask8) __U,
11018 _MM_FROUND_CUR_DIRECTION);
11019}
11020
11021extern __inline __m512d
11022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11024{
11025 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11026 (__v8df) __B,
11027 (__v8df)
11028 _mm512_setzero_pd (),
11029 (__mmask8) __U,
11030 _MM_FROUND_CUR_DIRECTION);
11031}
11032
11033extern __inline __m512
11034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11035_mm512_min_ps (__m512 __A, __m512 __B)
11036{
11037 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11038 (__v16sf) __B,
11039 (__v16sf)
0b192937 11040 _mm512_undefined_ps (),
756c5857
AI
11041 (__mmask16) -1,
11042 _MM_FROUND_CUR_DIRECTION);
11043}
11044
11045extern __inline __m512
11046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11047_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11048{
11049 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11050 (__v16sf) __B,
11051 (__v16sf) __W,
11052 (__mmask16) __U,
11053 _MM_FROUND_CUR_DIRECTION);
11054}
11055
11056extern __inline __m512
11057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11058_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11059{
11060 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11061 (__v16sf) __B,
11062 (__v16sf)
11063 _mm512_setzero_ps (),
11064 (__mmask16) __U,
11065 _MM_FROUND_CUR_DIRECTION);
11066}
11067
11068extern __inline __m512d
11069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11070_mm512_scalef_pd (__m512d __A, __m512d __B)
11071{
11072 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11073 (__v8df) __B,
11074 (__v8df)
0b192937 11075 _mm512_undefined_pd (),
756c5857
AI
11076 (__mmask8) -1,
11077 _MM_FROUND_CUR_DIRECTION);
11078}
11079
11080extern __inline __m512d
11081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11083{
11084 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11085 (__v8df) __B,
11086 (__v8df) __W,
11087 (__mmask8) __U,
11088 _MM_FROUND_CUR_DIRECTION);
11089}
11090
11091extern __inline __m512d
11092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11093_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11094{
11095 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11096 (__v8df) __B,
11097 (__v8df)
11098 _mm512_setzero_pd (),
11099 (__mmask8) __U,
11100 _MM_FROUND_CUR_DIRECTION);
11101}
11102
11103extern __inline __m512
11104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11105_mm512_scalef_ps (__m512 __A, __m512 __B)
11106{
11107 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11108 (__v16sf) __B,
11109 (__v16sf)
0b192937 11110 _mm512_undefined_ps (),
756c5857
AI
11111 (__mmask16) -1,
11112 _MM_FROUND_CUR_DIRECTION);
11113}
11114
11115extern __inline __m512
11116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11117_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11118{
11119 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11120 (__v16sf) __B,
11121 (__v16sf) __W,
11122 (__mmask16) __U,
11123 _MM_FROUND_CUR_DIRECTION);
11124}
11125
11126extern __inline __m512
11127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11128_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11129{
11130 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11131 (__v16sf) __B,
11132 (__v16sf)
11133 _mm512_setzero_ps (),
11134 (__mmask16) __U,
11135 _MM_FROUND_CUR_DIRECTION);
11136}
11137
075691af
AI
11138extern __inline __m128d
11139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11140_mm_scalef_sd (__m128d __A, __m128d __B)
11141{
11142 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11143 (__v2df) __B,
11144 _MM_FROUND_CUR_DIRECTION);
11145}
11146
11147extern __inline __m128
11148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11149_mm_scalef_ss (__m128 __A, __m128 __B)
11150{
11151 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11152 (__v4sf) __B,
11153 _MM_FROUND_CUR_DIRECTION);
11154}
11155
756c5857
AI
11156extern __inline __m512d
11157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11159{
11160 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11161 (__v8df) __B,
11162 (__v8df) __C,
11163 (__mmask8) -1,
11164 _MM_FROUND_CUR_DIRECTION);
11165}
11166
11167extern __inline __m512d
11168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11170{
11171 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11172 (__v8df) __B,
11173 (__v8df) __C,
11174 (__mmask8) __U,
11175 _MM_FROUND_CUR_DIRECTION);
11176}
11177
11178extern __inline __m512d
11179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11181{
11182 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11183 (__v8df) __B,
11184 (__v8df) __C,
11185 (__mmask8) __U,
11186 _MM_FROUND_CUR_DIRECTION);
11187}
11188
11189extern __inline __m512d
11190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11191_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11192{
11193 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11194 (__v8df) __B,
11195 (__v8df) __C,
11196 (__mmask8) __U,
11197 _MM_FROUND_CUR_DIRECTION);
11198}
11199
11200extern __inline __m512
11201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11202_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11203{
11204 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11205 (__v16sf) __B,
11206 (__v16sf) __C,
11207 (__mmask16) -1,
11208 _MM_FROUND_CUR_DIRECTION);
11209}
11210
11211extern __inline __m512
11212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11213_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11214{
11215 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11216 (__v16sf) __B,
11217 (__v16sf) __C,
11218 (__mmask16) __U,
11219 _MM_FROUND_CUR_DIRECTION);
11220}
11221
11222extern __inline __m512
11223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11224_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11225{
11226 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11227 (__v16sf) __B,
11228 (__v16sf) __C,
11229 (__mmask16) __U,
11230 _MM_FROUND_CUR_DIRECTION);
11231}
11232
11233extern __inline __m512
11234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11235_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11236{
11237 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11238 (__v16sf) __B,
11239 (__v16sf) __C,
11240 (__mmask16) __U,
11241 _MM_FROUND_CUR_DIRECTION);
11242}
11243
11244extern __inline __m512d
11245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11246_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11247{
11248 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11249 (__v8df) __B,
11250 -(__v8df) __C,
11251 (__mmask8) -1,
11252 _MM_FROUND_CUR_DIRECTION);
11253}
11254
11255extern __inline __m512d
11256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11257_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11258{
11259 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11260 (__v8df) __B,
11261 -(__v8df) __C,
11262 (__mmask8) __U,
11263 _MM_FROUND_CUR_DIRECTION);
11264}
11265
11266extern __inline __m512d
11267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11268_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11269{
11270 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11271 (__v8df) __B,
11272 (__v8df) __C,
11273 (__mmask8) __U,
11274 _MM_FROUND_CUR_DIRECTION);
11275}
11276
11277extern __inline __m512d
11278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11279_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11280{
11281 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11282 (__v8df) __B,
11283 -(__v8df) __C,
11284 (__mmask8) __U,
11285 _MM_FROUND_CUR_DIRECTION);
11286}
11287
11288extern __inline __m512
11289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11290_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11291{
11292 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11293 (__v16sf) __B,
11294 -(__v16sf) __C,
11295 (__mmask16) -1,
11296 _MM_FROUND_CUR_DIRECTION);
11297}
11298
11299extern __inline __m512
11300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11301_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11302{
11303 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11304 (__v16sf) __B,
11305 -(__v16sf) __C,
11306 (__mmask16) __U,
11307 _MM_FROUND_CUR_DIRECTION);
11308}
11309
11310extern __inline __m512
11311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11312_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11313{
11314 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11315 (__v16sf) __B,
11316 (__v16sf) __C,
11317 (__mmask16) __U,
11318 _MM_FROUND_CUR_DIRECTION);
11319}
11320
11321extern __inline __m512
11322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11323_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11324{
11325 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11326 (__v16sf) __B,
11327 -(__v16sf) __C,
11328 (__mmask16) __U,
11329 _MM_FROUND_CUR_DIRECTION);
11330}
11331
11332extern __inline __m512d
11333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11334_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11335{
11336 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11337 (__v8df) __B,
11338 (__v8df) __C,
11339 (__mmask8) -1,
11340 _MM_FROUND_CUR_DIRECTION);
11341}
11342
11343extern __inline __m512d
11344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11345_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11346{
11347 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11348 (__v8df) __B,
11349 (__v8df) __C,
11350 (__mmask8) __U,
11351 _MM_FROUND_CUR_DIRECTION);
11352}
11353
11354extern __inline __m512d
11355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11356_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11357{
11358 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11359 (__v8df) __B,
11360 (__v8df) __C,
11361 (__mmask8) __U,
11362 _MM_FROUND_CUR_DIRECTION);
11363}
11364
11365extern __inline __m512d
11366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11367_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11368{
11369 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11370 (__v8df) __B,
11371 (__v8df) __C,
11372 (__mmask8) __U,
11373 _MM_FROUND_CUR_DIRECTION);
11374}
11375
11376extern __inline __m512
11377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11378_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11379{
11380 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11381 (__v16sf) __B,
11382 (__v16sf) __C,
11383 (__mmask16) -1,
11384 _MM_FROUND_CUR_DIRECTION);
11385}
11386
11387extern __inline __m512
11388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11389_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11390{
11391 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11392 (__v16sf) __B,
11393 (__v16sf) __C,
11394 (__mmask16) __U,
11395 _MM_FROUND_CUR_DIRECTION);
11396}
11397
11398extern __inline __m512
11399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11400_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11401{
11402 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11403 (__v16sf) __B,
11404 (__v16sf) __C,
11405 (__mmask16) __U,
11406 _MM_FROUND_CUR_DIRECTION);
11407}
11408
11409extern __inline __m512
11410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11411_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11412{
11413 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11414 (__v16sf) __B,
11415 (__v16sf) __C,
11416 (__mmask16) __U,
11417 _MM_FROUND_CUR_DIRECTION);
11418}
11419
11420extern __inline __m512d
11421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11422_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11423{
11424 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11425 (__v8df) __B,
11426 -(__v8df) __C,
11427 (__mmask8) -1,
11428 _MM_FROUND_CUR_DIRECTION);
11429}
11430
11431extern __inline __m512d
11432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11433_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11434{
11435 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11436 (__v8df) __B,
11437 -(__v8df) __C,
11438 (__mmask8) __U,
11439 _MM_FROUND_CUR_DIRECTION);
11440}
11441
11442extern __inline __m512d
11443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11444_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11445{
11446 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11447 (__v8df) __B,
11448 (__v8df) __C,
11449 (__mmask8) __U,
11450 _MM_FROUND_CUR_DIRECTION);
11451}
11452
11453extern __inline __m512d
11454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11455_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11456{
11457 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11458 (__v8df) __B,
11459 -(__v8df) __C,
11460 (__mmask8) __U,
11461 _MM_FROUND_CUR_DIRECTION);
11462}
11463
11464extern __inline __m512
11465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11466_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11467{
11468 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11469 (__v16sf) __B,
11470 -(__v16sf) __C,
11471 (__mmask16) -1,
11472 _MM_FROUND_CUR_DIRECTION);
11473}
11474
11475extern __inline __m512
11476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11478{
11479 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11480 (__v16sf) __B,
11481 -(__v16sf) __C,
11482 (__mmask16) __U,
11483 _MM_FROUND_CUR_DIRECTION);
11484}
11485
11486extern __inline __m512
11487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11488_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11489{
11490 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11491 (__v16sf) __B,
11492 (__v16sf) __C,
11493 (__mmask16) __U,
11494 _MM_FROUND_CUR_DIRECTION);
11495}
11496
11497extern __inline __m512
11498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11499_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11500{
11501 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11502 (__v16sf) __B,
11503 -(__v16sf) __C,
11504 (__mmask16) __U,
11505 _MM_FROUND_CUR_DIRECTION);
11506}
11507
11508extern __inline __m512d
11509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11510_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11511{
11512 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11513 (__v8df) __B,
11514 (__v8df) __C,
11515 (__mmask8) -1,
11516 _MM_FROUND_CUR_DIRECTION);
11517}
11518
11519extern __inline __m512d
11520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11521_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11522{
11523 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11524 (__v8df) __B,
11525 (__v8df) __C,
11526 (__mmask8) __U,
11527 _MM_FROUND_CUR_DIRECTION);
11528}
11529
11530extern __inline __m512d
11531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11533{
11534 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11535 (__v8df) __B,
11536 (__v8df) __C,
11537 (__mmask8) __U,
11538 _MM_FROUND_CUR_DIRECTION);
11539}
11540
11541extern __inline __m512d
11542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11543_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11544{
11545 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11546 (__v8df) __B,
11547 (__v8df) __C,
11548 (__mmask8) __U,
11549 _MM_FROUND_CUR_DIRECTION);
11550}
11551
11552extern __inline __m512
11553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11554_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11555{
11556 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11557 (__v16sf) __B,
11558 (__v16sf) __C,
11559 (__mmask16) -1,
11560 _MM_FROUND_CUR_DIRECTION);
11561}
11562
11563extern __inline __m512
11564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11565_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11566{
11567 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11568 (__v16sf) __B,
11569 (__v16sf) __C,
11570 (__mmask16) __U,
11571 _MM_FROUND_CUR_DIRECTION);
11572}
11573
11574extern __inline __m512
11575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11576_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11577{
11578 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11579 (__v16sf) __B,
11580 (__v16sf) __C,
11581 (__mmask16) __U,
11582 _MM_FROUND_CUR_DIRECTION);
11583}
11584
11585extern __inline __m512
11586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11587_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11588{
11589 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11590 (__v16sf) __B,
11591 (__v16sf) __C,
11592 (__mmask16) __U,
11593 _MM_FROUND_CUR_DIRECTION);
11594}
11595
11596extern __inline __m512d
11597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11598_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11599{
11600 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11601 (__v8df) __B,
11602 -(__v8df) __C,
11603 (__mmask8) -1,
11604 _MM_FROUND_CUR_DIRECTION);
11605}
11606
11607extern __inline __m512d
11608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11609_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11610{
11611 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11612 (__v8df) __B,
11613 (__v8df) __C,
11614 (__mmask8) __U,
11615 _MM_FROUND_CUR_DIRECTION);
11616}
11617
11618extern __inline __m512d
11619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11621{
11622 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11623 (__v8df) __B,
11624 (__v8df) __C,
11625 (__mmask8) __U,
11626 _MM_FROUND_CUR_DIRECTION);
11627}
11628
11629extern __inline __m512d
11630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11631_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11632{
11633 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11634 (__v8df) __B,
11635 -(__v8df) __C,
11636 (__mmask8) __U,
11637 _MM_FROUND_CUR_DIRECTION);
11638}
11639
11640extern __inline __m512
11641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11642_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11643{
11644 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11645 (__v16sf) __B,
11646 -(__v16sf) __C,
11647 (__mmask16) -1,
11648 _MM_FROUND_CUR_DIRECTION);
11649}
11650
11651extern __inline __m512
11652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11654{
11655 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11656 (__v16sf) __B,
11657 (__v16sf) __C,
11658 (__mmask16) __U,
11659 _MM_FROUND_CUR_DIRECTION);
11660}
11661
11662extern __inline __m512
11663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11664_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11665{
11666 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11667 (__v16sf) __B,
11668 (__v16sf) __C,
11669 (__mmask16) __U,
11670 _MM_FROUND_CUR_DIRECTION);
11671}
11672
11673extern __inline __m512
11674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11675_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11676{
11677 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11678 (__v16sf) __B,
11679 -(__v16sf) __C,
11680 (__mmask16) __U,
11681 _MM_FROUND_CUR_DIRECTION);
11682}
11683
11684extern __inline __m256i
11685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11686_mm512_cvttpd_epi32 (__m512d __A)
11687{
11688 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11689 (__v8si)
0b192937 11690 _mm256_undefined_si256 (),
756c5857
AI
11691 (__mmask8) -1,
11692 _MM_FROUND_CUR_DIRECTION);
11693}
11694
11695extern __inline __m256i
11696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11697_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11698{
11699 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11700 (__v8si) __W,
11701 (__mmask8) __U,
11702 _MM_FROUND_CUR_DIRECTION);
11703}
11704
11705extern __inline __m256i
11706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11707_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11708{
11709 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11710 (__v8si)
11711 _mm256_setzero_si256 (),
11712 (__mmask8) __U,
11713 _MM_FROUND_CUR_DIRECTION);
11714}
11715
11716extern __inline __m256i
11717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11718_mm512_cvttpd_epu32 (__m512d __A)
11719{
11720 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11721 (__v8si)
0b192937 11722 _mm256_undefined_si256 (),
756c5857
AI
11723 (__mmask8) -1,
11724 _MM_FROUND_CUR_DIRECTION);
11725}
11726
11727extern __inline __m256i
11728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11729_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11730{
11731 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11732 (__v8si) __W,
11733 (__mmask8) __U,
11734 _MM_FROUND_CUR_DIRECTION);
11735}
11736
11737extern __inline __m256i
11738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11739_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11740{
11741 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11742 (__v8si)
11743 _mm256_setzero_si256 (),
11744 (__mmask8) __U,
11745 _MM_FROUND_CUR_DIRECTION);
11746}
11747
11748extern __inline __m256i
11749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11750_mm512_cvtpd_epi32 (__m512d __A)
11751{
11752 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11753 (__v8si)
0b192937 11754 _mm256_undefined_si256 (),
756c5857
AI
11755 (__mmask8) -1,
11756 _MM_FROUND_CUR_DIRECTION);
11757}
11758
11759extern __inline __m256i
11760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11761_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11762{
11763 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11764 (__v8si) __W,
11765 (__mmask8) __U,
11766 _MM_FROUND_CUR_DIRECTION);
11767}
11768
11769extern __inline __m256i
11770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11771_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11772{
11773 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11774 (__v8si)
11775 _mm256_setzero_si256 (),
11776 (__mmask8) __U,
11777 _MM_FROUND_CUR_DIRECTION);
11778}
11779
11780extern __inline __m256i
11781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11782_mm512_cvtpd_epu32 (__m512d __A)
11783{
11784 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11785 (__v8si)
0b192937 11786 _mm256_undefined_si256 (),
756c5857
AI
11787 (__mmask8) -1,
11788 _MM_FROUND_CUR_DIRECTION);
11789}
11790
11791extern __inline __m256i
11792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11793_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11794{
11795 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11796 (__v8si) __W,
11797 (__mmask8) __U,
11798 _MM_FROUND_CUR_DIRECTION);
11799}
11800
11801extern __inline __m256i
11802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11803_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11804{
11805 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11806 (__v8si)
11807 _mm256_setzero_si256 (),
11808 (__mmask8) __U,
11809 _MM_FROUND_CUR_DIRECTION);
11810}
11811
11812extern __inline __m512i
11813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814_mm512_cvttps_epi32 (__m512 __A)
11815{
11816 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11817 (__v16si)
4271e5cb 11818 _mm512_undefined_epi32 (),
756c5857
AI
11819 (__mmask16) -1,
11820 _MM_FROUND_CUR_DIRECTION);
11821}
11822
11823extern __inline __m512i
11824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11825_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11826{
11827 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11828 (__v16si) __W,
11829 (__mmask16) __U,
11830 _MM_FROUND_CUR_DIRECTION);
11831}
11832
11833extern __inline __m512i
11834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11836{
11837 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11838 (__v16si)
11839 _mm512_setzero_si512 (),
11840 (__mmask16) __U,
11841 _MM_FROUND_CUR_DIRECTION);
11842}
11843
11844extern __inline __m512i
11845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11846_mm512_cvttps_epu32 (__m512 __A)
11847{
11848 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11849 (__v16si)
4271e5cb 11850 _mm512_undefined_epi32 (),
756c5857
AI
11851 (__mmask16) -1,
11852 _MM_FROUND_CUR_DIRECTION);
11853}
11854
11855extern __inline __m512i
11856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11857_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11858{
11859 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11860 (__v16si) __W,
11861 (__mmask16) __U,
11862 _MM_FROUND_CUR_DIRECTION);
11863}
11864
11865extern __inline __m512i
11866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11867_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11868{
11869 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11870 (__v16si)
11871 _mm512_setzero_si512 (),
11872 (__mmask16) __U,
11873 _MM_FROUND_CUR_DIRECTION);
11874}
11875
11876extern __inline __m512i
11877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878_mm512_cvtps_epi32 (__m512 __A)
11879{
11880 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11881 (__v16si)
4271e5cb 11882 _mm512_undefined_epi32 (),
756c5857
AI
11883 (__mmask16) -1,
11884 _MM_FROUND_CUR_DIRECTION);
11885}
11886
11887extern __inline __m512i
11888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11889_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11890{
11891 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11892 (__v16si) __W,
11893 (__mmask16) __U,
11894 _MM_FROUND_CUR_DIRECTION);
11895}
11896
11897extern __inline __m512i
11898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11899_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11900{
11901 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11902 (__v16si)
11903 _mm512_setzero_si512 (),
11904 (__mmask16) __U,
11905 _MM_FROUND_CUR_DIRECTION);
11906}
11907
11908extern __inline __m512i
11909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11910_mm512_cvtps_epu32 (__m512 __A)
11911{
11912 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11913 (__v16si)
4271e5cb 11914 _mm512_undefined_epi32 (),
756c5857
AI
11915 (__mmask16) -1,
11916 _MM_FROUND_CUR_DIRECTION);
11917}
11918
11919extern __inline __m512i
11920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11921_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11922{
11923 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11924 (__v16si) __W,
11925 (__mmask16) __U,
11926 _MM_FROUND_CUR_DIRECTION);
11927}
11928
11929extern __inline __m512i
11930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11931_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11932{
11933 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11934 (__v16si)
11935 _mm512_setzero_si512 (),
11936 (__mmask16) __U,
11937 _MM_FROUND_CUR_DIRECTION);
11938}
11939
11940#ifdef __x86_64__
11941extern __inline __m128
11942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11943_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11944{
11945 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11946 _MM_FROUND_CUR_DIRECTION);
11947}
11948
11949extern __inline __m128d
11950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11951_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11952{
11953 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11954 _MM_FROUND_CUR_DIRECTION);
11955}
11956#endif
11957
11958extern __inline __m128
11959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11960_mm_cvtu32_ss (__m128 __A, unsigned __B)
11961{
11962 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11963 _MM_FROUND_CUR_DIRECTION);
11964}
11965
11966extern __inline __m512
11967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11968_mm512_cvtepi32_ps (__m512i __A)
11969{
11970 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11971 (__v16sf)
0b192937 11972 _mm512_undefined_ps (),
756c5857
AI
11973 (__mmask16) -1,
11974 _MM_FROUND_CUR_DIRECTION);
11975}
11976
11977extern __inline __m512
11978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11979_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11980{
11981 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11982 (__v16sf) __W,
11983 (__mmask16) __U,
11984 _MM_FROUND_CUR_DIRECTION);
11985}
11986
11987extern __inline __m512
11988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11989_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11990{
11991 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11992 (__v16sf)
11993 _mm512_setzero_ps (),
11994 (__mmask16) __U,
11995 _MM_FROUND_CUR_DIRECTION);
11996}
11997
11998extern __inline __m512
11999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12000_mm512_cvtepu32_ps (__m512i __A)
12001{
12002 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12003 (__v16sf)
0b192937 12004 _mm512_undefined_ps (),
756c5857
AI
12005 (__mmask16) -1,
12006 _MM_FROUND_CUR_DIRECTION);
12007}
12008
12009extern __inline __m512
12010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12011_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12012{
12013 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12014 (__v16sf) __W,
12015 (__mmask16) __U,
12016 _MM_FROUND_CUR_DIRECTION);
12017}
12018
12019extern __inline __m512
12020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12021_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12022{
12023 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12024 (__v16sf)
12025 _mm512_setzero_ps (),
12026 (__mmask16) __U,
12027 _MM_FROUND_CUR_DIRECTION);
12028}
12029
12030#ifdef __OPTIMIZE__
12031extern __inline __m512d
12032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12033_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12034{
12035 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12036 (__v8df) __B,
12037 (__v8di) __C,
12038 __imm,
12039 (__mmask8) -1,
12040 _MM_FROUND_CUR_DIRECTION);
12041}
12042
12043extern __inline __m512d
12044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12045_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12046 __m512i __C, const int __imm)
12047{
12048 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12049 (__v8df) __B,
12050 (__v8di) __C,
12051 __imm,
12052 (__mmask8) __U,
12053 _MM_FROUND_CUR_DIRECTION);
12054}
12055
12056extern __inline __m512d
12057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12058_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12059 __m512i __C, const int __imm)
12060{
12061 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12062 (__v8df) __B,
12063 (__v8di) __C,
12064 __imm,
12065 (__mmask8) __U,
12066 _MM_FROUND_CUR_DIRECTION);
12067}
12068
12069extern __inline __m512
12070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12071_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12072{
12073 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12074 (__v16sf) __B,
12075 (__v16si) __C,
12076 __imm,
12077 (__mmask16) -1,
12078 _MM_FROUND_CUR_DIRECTION);
12079}
12080
12081extern __inline __m512
12082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12083_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12084 __m512i __C, const int __imm)
12085{
12086 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12087 (__v16sf) __B,
12088 (__v16si) __C,
12089 __imm,
12090 (__mmask16) __U,
12091 _MM_FROUND_CUR_DIRECTION);
12092}
12093
12094extern __inline __m512
12095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12096_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12097 __m512i __C, const int __imm)
12098{
12099 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12100 (__v16sf) __B,
12101 (__v16si) __C,
12102 __imm,
12103 (__mmask16) __U,
12104 _MM_FROUND_CUR_DIRECTION);
12105}
12106
12107extern __inline __m128d
12108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12109_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12110{
12111 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12112 (__v2df) __B,
12113 (__v2di) __C, __imm,
12114 (__mmask8) -1,
12115 _MM_FROUND_CUR_DIRECTION);
12116}
12117
12118extern __inline __m128d
12119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12120_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12121 __m128i __C, const int __imm)
12122{
12123 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12124 (__v2df) __B,
12125 (__v2di) __C, __imm,
12126 (__mmask8) __U,
12127 _MM_FROUND_CUR_DIRECTION);
12128}
12129
12130extern __inline __m128d
12131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12132_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12133 __m128i __C, const int __imm)
12134{
12135 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12136 (__v2df) __B,
12137 (__v2di) __C,
12138 __imm,
12139 (__mmask8) __U,
12140 _MM_FROUND_CUR_DIRECTION);
12141}
12142
12143extern __inline __m128
12144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12146{
12147 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12148 (__v4sf) __B,
12149 (__v4si) __C, __imm,
12150 (__mmask8) -1,
12151 _MM_FROUND_CUR_DIRECTION);
12152}
12153
12154extern __inline __m128
12155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12156_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12157 __m128i __C, const int __imm)
12158{
12159 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12160 (__v4sf) __B,
12161 (__v4si) __C, __imm,
12162 (__mmask8) __U,
12163 _MM_FROUND_CUR_DIRECTION);
12164}
12165
12166extern __inline __m128
12167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12168_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12169 __m128i __C, const int __imm)
12170{
12171 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12172 (__v4sf) __B,
12173 (__v4si) __C, __imm,
12174 (__mmask8) __U,
12175 _MM_FROUND_CUR_DIRECTION);
12176}
12177#else
12178#define _mm512_fixupimm_pd(X, Y, Z, C) \
12179 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12180 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12181 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12182
12183#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12184 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12185 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12186 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12187
12188#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12189 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12190 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12191 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12192
12193#define _mm512_fixupimm_ps(X, Y, Z, C) \
12194 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12195 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12196 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12197
12198#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12199 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12200 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12201 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12202
12203#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12204 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12205 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12206 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12207
12208#define _mm_fixupimm_sd(X, Y, Z, C) \
12209 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12210 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12211 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12212
12213#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12214 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12215 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12216 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12217
12218#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12219 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12220 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12221 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12222
12223#define _mm_fixupimm_ss(X, Y, Z, C) \
12224 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12225 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12226 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12227
12228#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12229 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12230 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12231 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12232
12233#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12234 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12235 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12236 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12237#endif
12238
12239#ifdef __x86_64__
12240extern __inline unsigned long long
12241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12242_mm_cvtss_u64 (__m128 __A)
12243{
12244 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12245 __A,
12246 _MM_FROUND_CUR_DIRECTION);
12247}
12248
12249extern __inline unsigned long long
12250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12251_mm_cvttss_u64 (__m128 __A)
12252{
12253 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12254 __A,
12255 _MM_FROUND_CUR_DIRECTION);
12256}
12257
12258extern __inline long long
12259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12260_mm_cvttss_i64 (__m128 __A)
12261{
12262 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12263 _MM_FROUND_CUR_DIRECTION);
12264}
12265#endif /* __x86_64__ */
12266
12267extern __inline unsigned
12268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12269_mm_cvtss_u32 (__m128 __A)
12270{
12271 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12272 _MM_FROUND_CUR_DIRECTION);
12273}
12274
12275extern __inline unsigned
12276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12277_mm_cvttss_u32 (__m128 __A)
12278{
12279 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12280 _MM_FROUND_CUR_DIRECTION);
12281}
12282
12283extern __inline int
12284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12285_mm_cvttss_i32 (__m128 __A)
12286{
12287 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12288 _MM_FROUND_CUR_DIRECTION);
12289}
12290
12291#ifdef __x86_64__
12292extern __inline unsigned long long
12293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12294_mm_cvtsd_u64 (__m128d __A)
12295{
12296 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12297 __A,
12298 _MM_FROUND_CUR_DIRECTION);
12299}
12300
12301extern __inline unsigned long long
12302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12303_mm_cvttsd_u64 (__m128d __A)
12304{
12305 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12306 __A,
12307 _MM_FROUND_CUR_DIRECTION);
12308}
12309
12310extern __inline long long
12311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12312_mm_cvttsd_i64 (__m128d __A)
12313{
12314 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12315 _MM_FROUND_CUR_DIRECTION);
12316}
12317#endif /* __x86_64__ */
12318
12319extern __inline unsigned
12320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12321_mm_cvtsd_u32 (__m128d __A)
12322{
12323 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12324 _MM_FROUND_CUR_DIRECTION);
12325}
12326
12327extern __inline unsigned
12328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12329_mm_cvttsd_u32 (__m128d __A)
12330{
12331 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12332 _MM_FROUND_CUR_DIRECTION);
12333}
12334
12335extern __inline int
12336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12337_mm_cvttsd_i32 (__m128d __A)
12338{
12339 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12340 _MM_FROUND_CUR_DIRECTION);
12341}
12342
12343extern __inline __m512d
12344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345_mm512_cvtps_pd (__m256 __A)
12346{
12347 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12348 (__v8df)
0b192937 12349 _mm512_undefined_pd (),
756c5857
AI
12350 (__mmask8) -1,
12351 _MM_FROUND_CUR_DIRECTION);
12352}
12353
12354extern __inline __m512d
12355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12356_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12357{
12358 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12359 (__v8df) __W,
12360 (__mmask8) __U,
12361 _MM_FROUND_CUR_DIRECTION);
12362}
12363
12364extern __inline __m512d
12365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12366_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12367{
12368 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12369 (__v8df)
12370 _mm512_setzero_pd (),
12371 (__mmask8) __U,
12372 _MM_FROUND_CUR_DIRECTION);
12373}
12374
12375extern __inline __m512
12376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377_mm512_cvtph_ps (__m256i __A)
12378{
12379 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12380 (__v16sf)
0b192937 12381 _mm512_undefined_ps (),
756c5857
AI
12382 (__mmask16) -1,
12383 _MM_FROUND_CUR_DIRECTION);
12384}
12385
12386extern __inline __m512
12387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12388_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12389{
12390 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12391 (__v16sf) __W,
12392 (__mmask16) __U,
12393 _MM_FROUND_CUR_DIRECTION);
12394}
12395
12396extern __inline __m512
12397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12398_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12399{
12400 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12401 (__v16sf)
12402 _mm512_setzero_ps (),
12403 (__mmask16) __U,
12404 _MM_FROUND_CUR_DIRECTION);
12405}
12406
12407extern __inline __m256
12408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12409_mm512_cvtpd_ps (__m512d __A)
12410{
12411 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12412 (__v8sf)
0b192937 12413 _mm256_undefined_ps (),
756c5857
AI
12414 (__mmask8) -1,
12415 _MM_FROUND_CUR_DIRECTION);
12416}
12417
12418extern __inline __m256
12419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12420_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12421{
12422 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12423 (__v8sf) __W,
12424 (__mmask8) __U,
12425 _MM_FROUND_CUR_DIRECTION);
12426}
12427
12428extern __inline __m256
12429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12430_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12431{
12432 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12433 (__v8sf)
12434 _mm256_setzero_ps (),
12435 (__mmask8) __U,
12436 _MM_FROUND_CUR_DIRECTION);
12437}
12438
12439#ifdef __OPTIMIZE__
12440extern __inline __m512
12441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12442_mm512_getexp_ps (__m512 __A)
12443{
12444 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12445 (__v16sf)
0b192937 12446 _mm512_undefined_ps (),
756c5857
AI
12447 (__mmask16) -1,
12448 _MM_FROUND_CUR_DIRECTION);
12449}
12450
12451extern __inline __m512
12452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12453_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12454{
12455 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12456 (__v16sf) __W,
12457 (__mmask16) __U,
12458 _MM_FROUND_CUR_DIRECTION);
12459}
12460
12461extern __inline __m512
12462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12463_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12464{
12465 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12466 (__v16sf)
12467 _mm512_setzero_ps (),
12468 (__mmask16) __U,
12469 _MM_FROUND_CUR_DIRECTION);
12470}
12471
12472extern __inline __m512d
12473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12474_mm512_getexp_pd (__m512d __A)
12475{
12476 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12477 (__v8df)
0b192937 12478 _mm512_undefined_pd (),
756c5857
AI
12479 (__mmask8) -1,
12480 _MM_FROUND_CUR_DIRECTION);
12481}
12482
12483extern __inline __m512d
12484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12485_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12486{
12487 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12488 (__v8df) __W,
12489 (__mmask8) __U,
12490 _MM_FROUND_CUR_DIRECTION);
12491}
12492
12493extern __inline __m512d
12494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12495_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12496{
12497 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12498 (__v8df)
12499 _mm512_setzero_pd (),
12500 (__mmask8) __U,
12501 _MM_FROUND_CUR_DIRECTION);
12502}
12503
075691af
AI
12504extern __inline __m128
12505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12506_mm_getexp_ss (__m128 __A, __m128 __B)
12507{
12508 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12509 (__v4sf) __B,
12510 _MM_FROUND_CUR_DIRECTION);
12511}
12512
12513extern __inline __m128d
12514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12515_mm_getexp_sd (__m128d __A, __m128d __B)
12516{
12517 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12518 (__v2df) __B,
12519 _MM_FROUND_CUR_DIRECTION);
12520}
12521
756c5857
AI
12522extern __inline __m512d
12523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12524_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12525 _MM_MANTISSA_SIGN_ENUM __C)
12526{
12527 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12528 (__C << 2) | __B,
0b192937 12529 _mm512_undefined_pd (),
756c5857
AI
12530 (__mmask8) -1,
12531 _MM_FROUND_CUR_DIRECTION);
12532}
12533
12534extern __inline __m512d
12535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12536_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12537 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12538{
12539 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12540 (__C << 2) | __B,
12541 (__v8df) __W, __U,
12542 _MM_FROUND_CUR_DIRECTION);
12543}
12544
12545extern __inline __m512d
12546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12547_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12548 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12549{
12550 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12551 (__C << 2) | __B,
12552 (__v8df)
12553 _mm512_setzero_pd (),
12554 __U,
12555 _MM_FROUND_CUR_DIRECTION);
12556}
12557
12558extern __inline __m512
12559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12560_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12561 _MM_MANTISSA_SIGN_ENUM __C)
12562{
12563 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12564 (__C << 2) | __B,
0b192937 12565 _mm512_undefined_ps (),
756c5857
AI
12566 (__mmask16) -1,
12567 _MM_FROUND_CUR_DIRECTION);
12568}
12569
12570extern __inline __m512
12571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12572_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12573 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12574{
12575 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12576 (__C << 2) | __B,
12577 (__v16sf) __W, __U,
12578 _MM_FROUND_CUR_DIRECTION);
12579}
12580
12581extern __inline __m512
12582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12583_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12584 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12585{
12586 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12587 (__C << 2) | __B,
12588 (__v16sf)
12589 _mm512_setzero_ps (),
12590 __U,
12591 _MM_FROUND_CUR_DIRECTION);
12592}
12593
075691af
AI
12594extern __inline __m128d
12595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12596_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12597 _MM_MANTISSA_SIGN_ENUM __D)
12598{
12599 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12600 (__v2df) __B,
12601 (__D << 2) | __C,
12602 _MM_FROUND_CUR_DIRECTION);
12603}
12604
12605extern __inline __m128
12606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12607_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12608 _MM_MANTISSA_SIGN_ENUM __D)
12609{
12610 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12611 (__v4sf) __B,
12612 (__D << 2) | __C,
12613 _MM_FROUND_CUR_DIRECTION);
12614}
12615
756c5857
AI
12616#else
12617#define _mm512_getmant_pd(X, B, C) \
12618 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12619 (int)(((C)<<2) | (B)), \
0b192937 12620 (__v8df)_mm512_undefined_pd(), \
756c5857
AI
12621 (__mmask8)-1,\
12622 _MM_FROUND_CUR_DIRECTION))
12623
12624#define _mm512_mask_getmant_pd(W, U, X, B, C) \
12625 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12626 (int)(((C)<<2) | (B)), \
12627 (__v8df)(__m512d)(W), \
12628 (__mmask8)(U),\
12629 _MM_FROUND_CUR_DIRECTION))
12630
12631#define _mm512_maskz_getmant_pd(U, X, B, C) \
12632 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12633 (int)(((C)<<2) | (B)), \
0b192937 12634 (__v8df)_mm512_setzero_pd(), \
756c5857
AI
12635 (__mmask8)(U),\
12636 _MM_FROUND_CUR_DIRECTION))
12637#define _mm512_getmant_ps(X, B, C) \
12638 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12639 (int)(((C)<<2) | (B)), \
0b192937 12640 (__v16sf)_mm512_undefined_ps(), \
756c5857
AI
12641 (__mmask16)-1,\
12642 _MM_FROUND_CUR_DIRECTION))
12643
12644#define _mm512_mask_getmant_ps(W, U, X, B, C) \
12645 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12646 (int)(((C)<<2) | (B)), \
12647 (__v16sf)(__m512)(W), \
12648 (__mmask16)(U),\
12649 _MM_FROUND_CUR_DIRECTION))
12650
12651#define _mm512_maskz_getmant_ps(U, X, B, C) \
12652 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12653 (int)(((C)<<2) | (B)), \
0b192937 12654 (__v16sf)_mm512_setzero_ps(), \
756c5857
AI
12655 (__mmask16)(U),\
12656 _MM_FROUND_CUR_DIRECTION))
075691af
AI
12657#define _mm_getmant_sd(X, Y, C, D) \
12658 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12659 (__v2df)(__m128d)(Y), \
12660 (int)(((D)<<2) | (C)), \
12661 _MM_FROUND_CUR_DIRECTION))
12662
12663#define _mm_getmant_ss(X, Y, C, D) \
12664 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12665 (__v4sf)(__m128)(Y), \
12666 (int)(((D)<<2) | (C)), \
12667 _MM_FROUND_CUR_DIRECTION))
12668
12669#define _mm_getexp_ss(A, B) \
12670 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12671 _MM_FROUND_CUR_DIRECTION))
12672
12673#define _mm_getexp_sd(A, B) \
12674 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12675 _MM_FROUND_CUR_DIRECTION))
12676
756c5857
AI
12677#define _mm512_getexp_ps(A) \
12678 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 12679 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12680
12681#define _mm512_mask_getexp_ps(W, U, A) \
12682 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12683 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12684
12685#define _mm512_maskz_getexp_ps(U, A) \
12686 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12687 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12688
12689#define _mm512_getexp_pd(A) \
12690 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 12691 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12692
12693#define _mm512_mask_getexp_pd(W, U, A) \
12694 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12695 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12696
12697#define _mm512_maskz_getexp_pd(U, A) \
12698 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12699 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12700#endif
12701
12702#ifdef __OPTIMIZE__
12703extern __inline __m512
12704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12705_mm512_roundscale_ps (__m512 __A, const int __imm)
12706{
12707 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
12708 (__v16sf)
12709 _mm512_undefined_ps (),
12710 -1,
756c5857
AI
12711 _MM_FROUND_CUR_DIRECTION);
12712}
12713
12714extern __inline __m512
12715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12716_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12717 const int __imm)
12718{
12719 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12720 (__v16sf) __A,
12721 (__mmask16) __B,
12722 _MM_FROUND_CUR_DIRECTION);
12723}
12724
12725extern __inline __m512
12726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12727_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12728{
12729 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12730 __imm,
12731 (__v16sf)
12732 _mm512_setzero_ps (),
12733 (__mmask16) __A,
12734 _MM_FROUND_CUR_DIRECTION);
12735}
12736
12737extern __inline __m512d
12738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12739_mm512_roundscale_pd (__m512d __A, const int __imm)
12740{
12741 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
12742 (__v8df)
12743 _mm512_undefined_pd (),
12744 -1,
756c5857
AI
12745 _MM_FROUND_CUR_DIRECTION);
12746}
12747
12748extern __inline __m512d
12749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12750_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12751 const int __imm)
12752{
12753 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12754 (__v8df) __A,
12755 (__mmask8) __B,
12756 _MM_FROUND_CUR_DIRECTION);
12757}
12758
12759extern __inline __m512d
12760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12761_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12762{
12763 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12764 __imm,
12765 (__v8df)
12766 _mm512_setzero_pd (),
12767 (__mmask8) __A,
12768 _MM_FROUND_CUR_DIRECTION);
12769}
12770
075691af
AI
12771extern __inline __m128
12772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12773_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12774{
12775 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12776 (__v4sf) __B, __imm,
12777 _MM_FROUND_CUR_DIRECTION);
12778}
12779
12780extern __inline __m128d
12781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12782_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12783{
12784 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12785 (__v2df) __B, __imm,
12786 _MM_FROUND_CUR_DIRECTION);
12787}
12788
756c5857
AI
12789#else
12790#define _mm512_roundscale_ps(A, B) \
12791 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 12792 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12793#define _mm512_mask_roundscale_ps(A, B, C, D) \
12794 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12795 (int)(D), \
12796 (__v16sf)(__m512)(A), \
12797 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12798#define _mm512_maskz_roundscale_ps(A, B, C) \
12799 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12800 (int)(C), \
12801 (__v16sf)_mm512_setzero_ps(),\
12802 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12803#define _mm512_roundscale_pd(A, B) \
12804 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 12805 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12806#define _mm512_mask_roundscale_pd(A, B, C, D) \
12807 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12808 (int)(D), \
12809 (__v8df)(__m512d)(A), \
12810 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12811#define _mm512_maskz_roundscale_pd(A, B, C) \
12812 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12813 (int)(C), \
12814 (__v8df)_mm512_setzero_pd(),\
12815 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
075691af
AI
12816#define _mm_roundscale_ss(A, B, C) \
12817 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12818 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12819#define _mm_roundscale_sd(A, B, C) \
12820 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12821 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12822#endif
12823
12824#ifdef __OPTIMIZE__
12825extern __inline __mmask8
12826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12827_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12828{
12829 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12830 (__v8df) __Y, __P,
12831 (__mmask8) -1,
12832 _MM_FROUND_CUR_DIRECTION);
12833}
12834
12835extern __inline __mmask16
12836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12837_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12838{
12839 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12840 (__v16sf) __Y, __P,
12841 (__mmask16) -1,
12842 _MM_FROUND_CUR_DIRECTION);
12843}
12844
12845extern __inline __mmask16
12846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12847_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12848{
12849 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12850 (__v16sf) __Y, __P,
12851 (__mmask16) __U,
12852 _MM_FROUND_CUR_DIRECTION);
12853}
12854
12855extern __inline __mmask8
12856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12857_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12858{
12859 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12860 (__v8df) __Y, __P,
12861 (__mmask8) __U,
12862 _MM_FROUND_CUR_DIRECTION);
12863}
12864
12865extern __inline __mmask8
12866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12867_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12868{
12869 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12870 (__v2df) __Y, __P,
12871 (__mmask8) -1,
12872 _MM_FROUND_CUR_DIRECTION);
12873}
12874
12875extern __inline __mmask8
12876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12877_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12878{
12879 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12880 (__v2df) __Y, __P,
12881 (__mmask8) __M,
12882 _MM_FROUND_CUR_DIRECTION);
12883}
12884
12885extern __inline __mmask8
12886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12887_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12888{
12889 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12890 (__v4sf) __Y, __P,
12891 (__mmask8) -1,
12892 _MM_FROUND_CUR_DIRECTION);
12893}
12894
12895extern __inline __mmask8
12896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12897_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12898{
12899 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12900 (__v4sf) __Y, __P,
12901 (__mmask8) __M,
12902 _MM_FROUND_CUR_DIRECTION);
12903}
12904
12905#else
12906#define _mm512_cmp_pd_mask(X, Y, P) \
12907 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12908 (__v8df)(__m512d)(Y), (int)(P),\
12909 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12910
12911#define _mm512_cmp_ps_mask(X, Y, P) \
12912 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12913 (__v16sf)(__m512)(Y), (int)(P),\
12914 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12915
12916#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12917 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12918 (__v8df)(__m512d)(Y), (int)(P),\
12919 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12920
12921#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12922 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12923 (__v16sf)(__m512)(Y), (int)(P),\
12924 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12925
12926#define _mm_cmp_sd_mask(X, Y, P) \
12927 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12928 (__v2df)(__m128d)(Y), (int)(P),\
12929 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12930
12931#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12932 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12933 (__v2df)(__m128d)(Y), (int)(P),\
12934 M,_MM_FROUND_CUR_DIRECTION))
12935
12936#define _mm_cmp_ss_mask(X, Y, P) \
12937 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12938 (__v4sf)(__m128)(Y), (int)(P), \
12939 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12940
12941#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12942 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12943 (__v4sf)(__m128)(Y), (int)(P), \
12944 M,_MM_FROUND_CUR_DIRECTION))
12945#endif
12946
2196a885
KY
12947extern __inline __mmask16
12948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12949_mm512_kmov (__mmask16 __A)
12950{
12951 return __builtin_ia32_kmov16 (__A);
12952}
12953
275be1da
IT
12954extern __inline __m512
12955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12956_mm512_castpd_ps (__m512d __A)
12957{
12958 return (__m512) (__A);
12959}
12960
12961extern __inline __m512i
12962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12963_mm512_castpd_si512 (__m512d __A)
12964{
12965 return (__m512i) (__A);
12966}
12967
12968extern __inline __m512d
12969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12970_mm512_castps_pd (__m512 __A)
12971{
12972 return (__m512d) (__A);
12973}
12974
12975extern __inline __m512i
12976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12977_mm512_castps_si512 (__m512 __A)
12978{
12979 return (__m512i) (__A);
12980}
12981
12982extern __inline __m512
12983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12984_mm512_castsi512_ps (__m512i __A)
12985{
12986 return (__m512) (__A);
12987}
12988
12989extern __inline __m512d
12990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12991_mm512_castsi512_pd (__m512i __A)
12992{
12993 return (__m512d) (__A);
12994}
12995
12996extern __inline __m128d
12997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12998_mm512_castpd512_pd128 (__m512d __A)
12999{
13000 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
13001}
13002
13003extern __inline __m128
13004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13005_mm512_castps512_ps128 (__m512 __A)
13006{
13007 return _mm512_extractf32x4_ps(__A, 0);
13008}
13009
13010extern __inline __m128i
13011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13012_mm512_castsi512_si128 (__m512i __A)
13013{
13014 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13015}
13016
13017extern __inline __m256d
13018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13019_mm512_castpd512_pd256 (__m512d __A)
13020{
13021 return _mm512_extractf64x4_pd(__A, 0);
13022}
13023
13024extern __inline __m256
13025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13026_mm512_castps512_ps256 (__m512 __A)
13027{
13028 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13029}
13030
13031extern __inline __m256i
13032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13033_mm512_castsi512_si256 (__m512i __A)
13034{
13035 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13036}
13037
13038extern __inline __m512d
13039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13040_mm512_castpd128_pd512 (__m128d __A)
13041{
13042 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13043}
13044
13045extern __inline __m512
13046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13047_mm512_castps128_ps512 (__m128 __A)
13048{
13049 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13050}
13051
13052extern __inline __m512i
13053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13054_mm512_castsi128_si512 (__m128i __A)
13055{
13056 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13057}
13058
13059extern __inline __m512d
13060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13061_mm512_castpd256_pd512 (__m256d __A)
13062{
13063 return __builtin_ia32_pd512_256pd (__A);
13064}
13065
13066extern __inline __m512
13067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13068_mm512_castps256_ps512 (__m256 __A)
13069{
13070 return __builtin_ia32_ps512_256ps (__A);
13071}
13072
13073extern __inline __m512i
13074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13075_mm512_castsi256_si512 (__m256i __A)
13076{
13077 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13078}
13079
13080extern __inline __mmask16
13081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13082_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13083{
13084 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13085 (__v16si) __B, 0,
13086 (__mmask16) -1);
13087}
13088
13089extern __inline __mmask16
13090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13091_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13092{
13093 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13094 (__v16si) __B, 0, __U);
13095}
13096
13097extern __inline __mmask8
13098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13099_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13100{
13101 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13102 (__v8di) __B, 0, __U);
13103}
13104
13105extern __inline __mmask8
13106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13107_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13108{
13109 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13110 (__v8di) __B, 0,
13111 (__mmask8) -1);
13112}
13113
13114extern __inline __mmask16
13115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13116_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13117{
13118 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13119 (__v16si) __B, 6,
13120 (__mmask16) -1);
13121}
13122
13123extern __inline __mmask16
13124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13125_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13126{
13127 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13128 (__v16si) __B, 6, __U);
13129}
13130
13131extern __inline __mmask8
13132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13133_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13134{
13135 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13136 (__v8di) __B, 6, __U);
13137}
13138
13139extern __inline __mmask8
13140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13141_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13142{
13143 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13144 (__v8di) __B, 6,
13145 (__mmask8) -1);
13146}
13147
756c5857
AI
13148#ifdef __DISABLE_AVX512F__
13149#undef __DISABLE_AVX512F__
13150#pragma GCC pop_options
13151#endif /* __DISABLE_AVX512F__ */
13152
13153#endif /* _AVX512FINTRIN_H_INCLUDED */