]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512fintrin.h
Daily bump.
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
cbe34bb5 1/* Copyright (C) 2013-2017 Free Software Foundation, Inc.
756c5857
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
2069d6fc 41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
756c5857 42typedef int __v16si __attribute__ ((__vector_size__ (64)));
2069d6fc 43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
7d9088c2 44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
2069d6fc 45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
7d9088c2 46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
2069d6fc 47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
756c5857
AI
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
c6b0037d
MG
55/* Unaligned version of the same type. */
56typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
756c5857
AI
60typedef unsigned char __mmask8;
61typedef unsigned short __mmask16;
62
dcb2c527
JJ
63extern __inline __mmask16
64__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65_mm512_int2mask (int __M)
66{
67 return (__mmask16) __M;
68}
69
70extern __inline int
71__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72_mm512_mask2int (__mmask16 __M)
73{
74 return (int) __M;
75}
76
756c5857
AI
77extern __inline __m512i
78__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79_mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
82{
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
85}
86
87/* Create the vector [A B C D E F G H I J K L M N O P]. */
88extern __inline __m512i
89__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90_mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
94{
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
98}
99
100extern __inline __m512d
101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
102_mm512_set_pd (double __A, double __B, double __C, double __D,
103 double __E, double __F, double __G, double __H)
104{
105 return __extension__ (__m512d)
106 { __H, __G, __F, __E, __D, __C, __B, __A };
107}
108
109extern __inline __m512
110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
111_mm512_set_ps (float __A, float __B, float __C, float __D,
112 float __E, float __F, float __G, float __H,
113 float __I, float __J, float __K, float __L,
114 float __M, float __N, float __O, float __P)
115{
116 return __extension__ (__m512)
117 { __P, __O, __N, __M, __L, __K, __J, __I,
118 __H, __G, __F, __E, __D, __C, __B, __A };
119}
120
121#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
122 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
123
124#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
125 e8,e9,e10,e11,e12,e13,e14,e15) \
126 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
127
128#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
129 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
130
131#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
132 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
133
0b192937
UD
134extern __inline __m512
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm512_undefined_ps (void)
137{
138 __m512 __Y = __Y;
139 return __Y;
140}
141
dcb2c527
JJ
142#define _mm512_undefined _mm512_undefined_ps
143
0b192937
UD
144extern __inline __m512d
145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146_mm512_undefined_pd (void)
147{
148 __m512d __Y = __Y;
149 return __Y;
150}
151
152extern __inline __m512i
153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4271e5cb 154_mm512_undefined_epi32 (void)
0b192937
UD
155{
156 __m512i __Y = __Y;
157 return __Y;
158}
159
4271e5cb
UB
160#define _mm512_undefined_si512 _mm512_undefined_epi32
161
7d9088c2
UD
162extern __inline __m512i
163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164_mm512_set1_epi8 (char __A)
165{
166 return __extension__ (__m512i)(__v64qi)
167 { __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A,
170 __A, __A, __A, __A, __A, __A, __A, __A,
171 __A, __A, __A, __A, __A, __A, __A, __A,
172 __A, __A, __A, __A, __A, __A, __A, __A,
173 __A, __A, __A, __A, __A, __A, __A, __A,
174 __A, __A, __A, __A, __A, __A, __A, __A };
175}
176
177extern __inline __m512i
178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179_mm512_set1_epi16 (short __A)
180{
181 return __extension__ (__m512i)(__v32hi)
182 { __A, __A, __A, __A, __A, __A, __A, __A,
183 __A, __A, __A, __A, __A, __A, __A, __A,
184 __A, __A, __A, __A, __A, __A, __A, __A,
185 __A, __A, __A, __A, __A, __A, __A, __A };
186}
187
2b2384e8
UD
188extern __inline __m512d
189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190_mm512_set1_pd (double __A)
191{
192 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
193 (__v2df) { __A, },
194 (__v8df)
195 _mm512_undefined_pd (),
196 (__mmask8) -1);
197}
198
199extern __inline __m512
200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201_mm512_set1_ps (float __A)
202{
203 return (__m512) __builtin_ia32_broadcastss512 (__extension__
204 (__v4sf) { __A, },
205 (__v16sf)
206 _mm512_undefined_ps (),
207 (__mmask16) -1);
208}
209
7d9088c2
UD
210/* Create the vector [A B C D A B C D A B C D A B C D]. */
211extern __inline __m512i
212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
214{
215 return __extension__ (__m512i)(__v16si)
216 { __D, __C, __B, __A, __D, __C, __B, __A,
217 __D, __C, __B, __A, __D, __C, __B, __A };
218}
219
220extern __inline __m512i
221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222_mm512_set4_epi64 (long long __A, long long __B, long long __C,
223 long long __D)
224{
225 return __extension__ (__m512i) (__v8di)
226 { __D, __C, __B, __A, __D, __C, __B, __A };
227}
228
229extern __inline __m512d
230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231_mm512_set4_pd (double __A, double __B, double __C, double __D)
232{
233 return __extension__ (__m512d)
234 { __D, __C, __B, __A, __D, __C, __B, __A };
235}
236
237extern __inline __m512
238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239_mm512_set4_ps (float __A, float __B, float __C, float __D)
240{
241 return __extension__ (__m512)
242 { __D, __C, __B, __A, __D, __C, __B, __A,
243 __D, __C, __B, __A, __D, __C, __B, __A };
244}
245
246#define _mm512_setr4_epi64(e0,e1,e2,e3) \
247 _mm512_set4_epi64(e3,e2,e1,e0)
248
249#define _mm512_setr4_epi32(e0,e1,e2,e3) \
250 _mm512_set4_epi32(e3,e2,e1,e0)
251
252#define _mm512_setr4_pd(e0,e1,e2,e3) \
253 _mm512_set4_pd(e3,e2,e1,e0)
254
255#define _mm512_setr4_ps(e0,e1,e2,e3) \
256 _mm512_set4_ps(e3,e2,e1,e0)
257
756c5857
AI
258extern __inline __m512
259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
260_mm512_setzero_ps (void)
261{
262 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
263 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
264}
265
266extern __inline __m512d
267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268_mm512_setzero_pd (void)
269{
270 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
271}
272
7d9088c2
UD
273extern __inline __m512i
274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275_mm512_setzero_epi32 (void)
276{
277 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
278}
279
756c5857
AI
280extern __inline __m512i
281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282_mm512_setzero_si512 (void)
283{
284 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
285}
286
287extern __inline __m512d
288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
290{
291 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
292 (__v8df) __W,
293 (__mmask8) __U);
294}
295
296extern __inline __m512d
297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
299{
300 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
301 (__v8df)
302 _mm512_setzero_pd (),
303 (__mmask8) __U);
304}
305
306extern __inline __m512
307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
309{
310 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
311 (__v16sf) __W,
312 (__mmask16) __U);
313}
314
315extern __inline __m512
316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
317_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
318{
319 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
320 (__v16sf)
321 _mm512_setzero_ps (),
322 (__mmask16) __U);
323}
324
325extern __inline __m512d
326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327_mm512_load_pd (void const *__P)
328{
329 return *(__m512d *) __P;
330}
331
332extern __inline __m512d
333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
335{
336 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
337 (__v8df) __W,
338 (__mmask8) __U);
339}
340
341extern __inline __m512d
342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
344{
345 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
346 (__v8df)
347 _mm512_setzero_pd (),
348 (__mmask8) __U);
349}
350
351extern __inline void
352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353_mm512_store_pd (void *__P, __m512d __A)
354{
355 *(__m512d *) __P = __A;
356}
357
358extern __inline void
359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
361{
362 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
363 (__mmask8) __U);
364}
365
366extern __inline __m512
367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368_mm512_load_ps (void const *__P)
369{
370 return *(__m512 *) __P;
371}
372
373extern __inline __m512
374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
376{
377 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
378 (__v16sf) __W,
379 (__mmask16) __U);
380}
381
382extern __inline __m512
383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
385{
386 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
387 (__v16sf)
388 _mm512_setzero_ps (),
389 (__mmask16) __U);
390}
391
392extern __inline void
393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
394_mm512_store_ps (void *__P, __m512 __A)
395{
396 *(__m512 *) __P = __A;
397}
398
399extern __inline void
400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
401_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
402{
403 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
404 (__mmask16) __U);
405}
406
407extern __inline __m512i
408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
410{
411 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
412 (__v8di) __W,
413 (__mmask8) __U);
414}
415
416extern __inline __m512i
417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
419{
420 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
421 (__v8di)
422 _mm512_setzero_si512 (),
423 (__mmask8) __U);
424}
425
426extern __inline __m512i
427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428_mm512_load_epi64 (void const *__P)
429{
430 return *(__m512i *) __P;
431}
432
433extern __inline __m512i
434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
436{
437 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
438 (__v8di) __W,
439 (__mmask8) __U);
440}
441
442extern __inline __m512i
443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
445{
446 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
447 (__v8di)
448 _mm512_setzero_si512 (),
449 (__mmask8) __U);
450}
451
452extern __inline void
453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454_mm512_store_epi64 (void *__P, __m512i __A)
455{
456 *(__m512i *) __P = __A;
457}
458
459extern __inline void
460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
462{
463 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
464 (__mmask8) __U);
465}
466
467extern __inline __m512i
468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
470{
471 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
472 (__v16si) __W,
473 (__mmask16) __U);
474}
475
476extern __inline __m512i
477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
479{
480 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
481 (__v16si)
482 _mm512_setzero_si512 (),
483 (__mmask16) __U);
484}
485
486extern __inline __m512i
487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488_mm512_load_si512 (void const *__P)
489{
490 return *(__m512i *) __P;
491}
492
493extern __inline __m512i
494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495_mm512_load_epi32 (void const *__P)
496{
497 return *(__m512i *) __P;
498}
499
500extern __inline __m512i
501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
503{
504 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
505 (__v16si) __W,
506 (__mmask16) __U);
507}
508
509extern __inline __m512i
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
512{
513 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
514 (__v16si)
515 _mm512_setzero_si512 (),
516 (__mmask16) __U);
517}
518
519extern __inline void
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm512_store_si512 (void *__P, __m512i __A)
522{
523 *(__m512i *) __P = __A;
524}
525
526extern __inline void
527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528_mm512_store_epi32 (void *__P, __m512i __A)
529{
530 *(__m512i *) __P = __A;
531}
532
533extern __inline void
534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
536{
537 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
538 (__mmask16) __U);
539}
540
541extern __inline __m512i
542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543_mm512_mullo_epi32 (__m512i __A, __m512i __B)
544{
2069d6fc 545 return (__m512i) ((__v16su) __A * (__v16su) __B);
756c5857
AI
546}
547
548extern __inline __m512i
549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
551{
552 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
553 (__v16si) __B,
554 (__v16si)
555 _mm512_setzero_si512 (),
556 __M);
557}
558
559extern __inline __m512i
560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
562{
563 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
564 (__v16si) __B,
565 (__v16si) __W, __M);
566}
567
568extern __inline __m512i
569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
571{
572 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
573 (__v16si) __Y,
574 (__v16si)
4271e5cb 575 _mm512_undefined_epi32 (),
756c5857
AI
576 (__mmask16) -1);
577}
578
579extern __inline __m512i
580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
582{
583 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
584 (__v16si) __Y,
585 (__v16si) __W,
586 (__mmask16) __U);
587}
588
589extern __inline __m512i
590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
592{
593 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
594 (__v16si) __Y,
595 (__v16si)
596 _mm512_setzero_si512 (),
597 (__mmask16) __U);
598}
599
600extern __inline __m512i
601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602_mm512_srav_epi32 (__m512i __X, __m512i __Y)
603{
604 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
605 (__v16si) __Y,
606 (__v16si)
4271e5cb 607 _mm512_undefined_epi32 (),
756c5857
AI
608 (__mmask16) -1);
609}
610
611extern __inline __m512i
612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
614{
615 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
616 (__v16si) __Y,
617 (__v16si) __W,
618 (__mmask16) __U);
619}
620
621extern __inline __m512i
622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
624{
625 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
626 (__v16si) __Y,
627 (__v16si)
628 _mm512_setzero_si512 (),
629 (__mmask16) __U);
630}
631
632extern __inline __m512i
633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
635{
636 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
637 (__v16si) __Y,
638 (__v16si)
4271e5cb 639 _mm512_undefined_epi32 (),
756c5857
AI
640 (__mmask16) -1);
641}
642
643extern __inline __m512i
644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
646{
647 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
648 (__v16si) __Y,
649 (__v16si) __W,
650 (__mmask16) __U);
651}
652
653extern __inline __m512i
654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
656{
657 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
658 (__v16si) __Y,
659 (__v16si)
660 _mm512_setzero_si512 (),
661 (__mmask16) __U);
662}
663
664extern __inline __m512i
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm512_add_epi64 (__m512i __A, __m512i __B)
667{
2069d6fc 668 return (__m512i) ((__v8du) __A + (__v8du) __B);
756c5857
AI
669}
670
671extern __inline __m512i
672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
674{
675 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
676 (__v8di) __B,
677 (__v8di) __W,
678 (__mmask8) __U);
679}
680
681extern __inline __m512i
682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
684{
685 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
686 (__v8di) __B,
687 (__v8di)
688 _mm512_setzero_si512 (),
689 (__mmask8) __U);
690}
691
692extern __inline __m512i
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm512_sub_epi64 (__m512i __A, __m512i __B)
695{
2069d6fc 696 return (__m512i) ((__v8du) __A - (__v8du) __B);
756c5857
AI
697}
698
699extern __inline __m512i
700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
704 (__v8di) __B,
705 (__v8di) __W,
706 (__mmask8) __U);
707}
708
709extern __inline __m512i
710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
712{
713 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
714 (__v8di) __B,
715 (__v8di)
716 _mm512_setzero_si512 (),
717 (__mmask8) __U);
718}
719
720extern __inline __m512i
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
723{
724 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
725 (__v8di) __Y,
726 (__v8di)
0b192937 727 _mm512_undefined_pd (),
756c5857
AI
728 (__mmask8) -1);
729}
730
731extern __inline __m512i
732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
734{
735 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
736 (__v8di) __Y,
737 (__v8di) __W,
738 (__mmask8) __U);
739}
740
741extern __inline __m512i
742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
744{
745 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
746 (__v8di) __Y,
747 (__v8di)
748 _mm512_setzero_si512 (),
749 (__mmask8) __U);
750}
751
752extern __inline __m512i
753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754_mm512_srav_epi64 (__m512i __X, __m512i __Y)
755{
756 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
757 (__v8di) __Y,
758 (__v8di)
4271e5cb 759 _mm512_undefined_epi32 (),
756c5857
AI
760 (__mmask8) -1);
761}
762
763extern __inline __m512i
764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
766{
767 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
768 (__v8di) __Y,
769 (__v8di) __W,
770 (__mmask8) __U);
771}
772
773extern __inline __m512i
774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
776{
777 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
778 (__v8di) __Y,
779 (__v8di)
780 _mm512_setzero_si512 (),
781 (__mmask8) __U);
782}
783
784extern __inline __m512i
785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
787{
788 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
789 (__v8di) __Y,
790 (__v8di)
4271e5cb 791 _mm512_undefined_epi32 (),
756c5857
AI
792 (__mmask8) -1);
793}
794
795extern __inline __m512i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
798{
799 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
800 (__v8di) __Y,
801 (__v8di) __W,
802 (__mmask8) __U);
803}
804
805extern __inline __m512i
806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
808{
809 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
810 (__v8di) __Y,
811 (__v8di)
812 _mm512_setzero_si512 (),
813 (__mmask8) __U);
814}
815
816extern __inline __m512i
817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818_mm512_add_epi32 (__m512i __A, __m512i __B)
819{
2069d6fc 820 return (__m512i) ((__v16su) __A + (__v16su) __B);
756c5857
AI
821}
822
823extern __inline __m512i
824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
826{
827 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
828 (__v16si) __B,
829 (__v16si) __W,
830 (__mmask16) __U);
831}
832
833extern __inline __m512i
834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
836{
837 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
838 (__v16si) __B,
839 (__v16si)
840 _mm512_setzero_si512 (),
841 (__mmask16) __U);
842}
843
844extern __inline __m512i
845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846_mm512_mul_epi32 (__m512i __X, __m512i __Y)
847{
848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
849 (__v16si) __Y,
850 (__v8di)
4271e5cb 851 _mm512_undefined_epi32 (),
756c5857
AI
852 (__mmask8) -1);
853}
854
855extern __inline __m512i
856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
858{
859 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
860 (__v16si) __Y,
861 (__v8di) __W, __M);
862}
863
864extern __inline __m512i
865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
867{
868 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
869 (__v16si) __Y,
870 (__v8di)
871 _mm512_setzero_si512 (),
872 __M);
873}
874
875extern __inline __m512i
876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877_mm512_sub_epi32 (__m512i __A, __m512i __B)
878{
2069d6fc 879 return (__m512i) ((__v16su) __A - (__v16su) __B);
756c5857
AI
880}
881
882extern __inline __m512i
883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
885{
886 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
887 (__v16si) __B,
888 (__v16si) __W,
889 (__mmask16) __U);
890}
891
892extern __inline __m512i
893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
895{
896 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
897 (__v16si) __B,
898 (__v16si)
899 _mm512_setzero_si512 (),
900 (__mmask16) __U);
901}
902
903extern __inline __m512i
904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
905_mm512_mul_epu32 (__m512i __X, __m512i __Y)
906{
907 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
908 (__v16si) __Y,
909 (__v8di)
4271e5cb 910 _mm512_undefined_epi32 (),
756c5857
AI
911 (__mmask8) -1);
912}
913
914extern __inline __m512i
915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
917{
918 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
919 (__v16si) __Y,
920 (__v8di) __W, __M);
921}
922
923extern __inline __m512i
924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
926{
927 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
928 (__v16si) __Y,
929 (__v8di)
930 _mm512_setzero_si512 (),
931 __M);
932}
933
934#ifdef __OPTIMIZE__
935extern __inline __m512i
936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937_mm512_slli_epi64 (__m512i __A, unsigned int __B)
938{
939 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
940 (__v8di)
4271e5cb 941 _mm512_undefined_epi32 (),
756c5857
AI
942 (__mmask8) -1);
943}
944
945extern __inline __m512i
946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
948 unsigned int __B)
949{
950 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
951 (__v8di) __W,
952 (__mmask8) __U);
953}
954
955extern __inline __m512i
956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
958{
959 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
960 (__v8di)
961 _mm512_setzero_si512 (),
962 (__mmask8) __U);
963}
964#else
965#define _mm512_slli_epi64(X, C) \
966 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 967 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
968 (__mmask8)-1))
969
970#define _mm512_mask_slli_epi64(W, U, X, C) \
971 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
972 (__v8di)(__m512i)(W),\
973 (__mmask8)(U)))
974
975#define _mm512_maskz_slli_epi64(U, X, C) \
976 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
977 (__v8di)(__m512i)_mm512_setzero_si512 (),\
978 (__mmask8)(U)))
979#endif
980
981extern __inline __m512i
982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983_mm512_sll_epi64 (__m512i __A, __m128i __B)
984{
985 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
986 (__v2di) __B,
987 (__v8di)
4271e5cb 988 _mm512_undefined_epi32 (),
756c5857
AI
989 (__mmask8) -1);
990}
991
992extern __inline __m512i
993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
995{
996 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
997 (__v2di) __B,
998 (__v8di) __W,
999 (__mmask8) __U);
1000}
1001
1002extern __inline __m512i
1003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1004_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1005{
1006 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1007 (__v2di) __B,
1008 (__v8di)
1009 _mm512_setzero_si512 (),
1010 (__mmask8) __U);
1011}
1012
1013#ifdef __OPTIMIZE__
1014extern __inline __m512i
1015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016_mm512_srli_epi64 (__m512i __A, unsigned int __B)
1017{
1018 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1019 (__v8di)
4271e5cb 1020 _mm512_undefined_epi32 (),
756c5857
AI
1021 (__mmask8) -1);
1022}
1023
1024extern __inline __m512i
1025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1027 __m512i __A, unsigned int __B)
1028{
1029 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1030 (__v8di) __W,
1031 (__mmask8) __U);
1032}
1033
1034extern __inline __m512i
1035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1037{
1038 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1039 (__v8di)
1040 _mm512_setzero_si512 (),
1041 (__mmask8) __U);
1042}
1043#else
1044#define _mm512_srli_epi64(X, C) \
1045 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1046 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1047 (__mmask8)-1))
1048
1049#define _mm512_mask_srli_epi64(W, U, X, C) \
1050 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1051 (__v8di)(__m512i)(W),\
1052 (__mmask8)(U)))
1053
1054#define _mm512_maskz_srli_epi64(U, X, C) \
1055 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1056 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1057 (__mmask8)(U)))
1058#endif
1059
1060extern __inline __m512i
1061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062_mm512_srl_epi64 (__m512i __A, __m128i __B)
1063{
1064 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1065 (__v2di) __B,
1066 (__v8di)
4271e5cb 1067 _mm512_undefined_epi32 (),
756c5857
AI
1068 (__mmask8) -1);
1069}
1070
1071extern __inline __m512i
1072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1074{
1075 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1076 (__v2di) __B,
1077 (__v8di) __W,
1078 (__mmask8) __U);
1079}
1080
1081extern __inline __m512i
1082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1084{
1085 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1086 (__v2di) __B,
1087 (__v8di)
1088 _mm512_setzero_si512 (),
1089 (__mmask8) __U);
1090}
1091
1092#ifdef __OPTIMIZE__
1093extern __inline __m512i
1094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1096{
1097 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1098 (__v8di)
4271e5cb 1099 _mm512_undefined_epi32 (),
756c5857
AI
1100 (__mmask8) -1);
1101}
1102
1103extern __inline __m512i
1104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1106 unsigned int __B)
1107{
1108 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1109 (__v8di) __W,
1110 (__mmask8) __U);
1111}
1112
1113extern __inline __m512i
1114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1115_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1116{
1117 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1118 (__v8di)
1119 _mm512_setzero_si512 (),
1120 (__mmask8) __U);
1121}
1122#else
1123#define _mm512_srai_epi64(X, C) \
1124 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1125 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1126 (__mmask8)-1))
1127
1128#define _mm512_mask_srai_epi64(W, U, X, C) \
1129 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1130 (__v8di)(__m512i)(W),\
1131 (__mmask8)(U)))
1132
1133#define _mm512_maskz_srai_epi64(U, X, C) \
1134 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1135 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1136 (__mmask8)(U)))
1137#endif
1138
1139extern __inline __m512i
1140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141_mm512_sra_epi64 (__m512i __A, __m128i __B)
1142{
1143 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1144 (__v2di) __B,
1145 (__v8di)
4271e5cb 1146 _mm512_undefined_epi32 (),
756c5857
AI
1147 (__mmask8) -1);
1148}
1149
1150extern __inline __m512i
1151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1152_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1153{
1154 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1155 (__v2di) __B,
1156 (__v8di) __W,
1157 (__mmask8) __U);
1158}
1159
1160extern __inline __m512i
1161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1162_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1163{
1164 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1165 (__v2di) __B,
1166 (__v8di)
1167 _mm512_setzero_si512 (),
1168 (__mmask8) __U);
1169}
1170
1171#ifdef __OPTIMIZE__
1172extern __inline __m512i
1173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1175{
1176 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1177 (__v16si)
4271e5cb 1178 _mm512_undefined_epi32 (),
756c5857
AI
1179 (__mmask16) -1);
1180}
1181
1182extern __inline __m512i
1183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1184_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1185 unsigned int __B)
1186{
1187 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1188 (__v16si) __W,
1189 (__mmask16) __U);
1190}
1191
1192extern __inline __m512i
1193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1195{
1196 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1197 (__v16si)
1198 _mm512_setzero_si512 (),
1199 (__mmask16) __U);
1200}
1201#else
1202#define _mm512_slli_epi32(X, C) \
1203 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1204 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1205 (__mmask16)-1))
1206
1207#define _mm512_mask_slli_epi32(W, U, X, C) \
1208 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1209 (__v16si)(__m512i)(W),\
1210 (__mmask16)(U)))
1211
1212#define _mm512_maskz_slli_epi32(U, X, C) \
1213 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1214 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1215 (__mmask16)(U)))
1216#endif
1217
1218extern __inline __m512i
1219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220_mm512_sll_epi32 (__m512i __A, __m128i __B)
1221{
1222 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1223 (__v4si) __B,
1224 (__v16si)
4271e5cb 1225 _mm512_undefined_epi32 (),
756c5857
AI
1226 (__mmask16) -1);
1227}
1228
1229extern __inline __m512i
1230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1232{
1233 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1234 (__v4si) __B,
1235 (__v16si) __W,
1236 (__mmask16) __U);
1237}
1238
1239extern __inline __m512i
1240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1242{
1243 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1244 (__v4si) __B,
1245 (__v16si)
1246 _mm512_setzero_si512 (),
1247 (__mmask16) __U);
1248}
1249
1250#ifdef __OPTIMIZE__
1251extern __inline __m512i
1252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1253_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1254{
1255 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1256 (__v16si)
4271e5cb 1257 _mm512_undefined_epi32 (),
756c5857
AI
1258 (__mmask16) -1);
1259}
1260
1261extern __inline __m512i
1262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1264 __m512i __A, unsigned int __B)
1265{
1266 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1267 (__v16si) __W,
1268 (__mmask16) __U);
1269}
1270
1271extern __inline __m512i
1272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1274{
1275 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1276 (__v16si)
1277 _mm512_setzero_si512 (),
1278 (__mmask16) __U);
1279}
1280#else
1281#define _mm512_srli_epi32(X, C) \
1282 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1283 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1284 (__mmask16)-1))
1285
1286#define _mm512_mask_srli_epi32(W, U, X, C) \
1287 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1288 (__v16si)(__m512i)(W),\
1289 (__mmask16)(U)))
1290
1291#define _mm512_maskz_srli_epi32(U, X, C) \
1292 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1293 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1294 (__mmask16)(U)))
1295#endif
1296
1297extern __inline __m512i
1298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299_mm512_srl_epi32 (__m512i __A, __m128i __B)
1300{
1301 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1302 (__v4si) __B,
1303 (__v16si)
4271e5cb 1304 _mm512_undefined_epi32 (),
756c5857
AI
1305 (__mmask16) -1);
1306}
1307
1308extern __inline __m512i
1309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1311{
1312 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1313 (__v4si) __B,
1314 (__v16si) __W,
1315 (__mmask16) __U);
1316}
1317
1318extern __inline __m512i
1319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1320_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1321{
1322 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1323 (__v4si) __B,
1324 (__v16si)
1325 _mm512_setzero_si512 (),
1326 (__mmask16) __U);
1327}
1328
1329#ifdef __OPTIMIZE__
1330extern __inline __m512i
1331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1333{
1334 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1335 (__v16si)
4271e5cb 1336 _mm512_undefined_epi32 (),
756c5857
AI
1337 (__mmask16) -1);
1338}
1339
1340extern __inline __m512i
1341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1343 unsigned int __B)
1344{
1345 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1346 (__v16si) __W,
1347 (__mmask16) __U);
1348}
1349
1350extern __inline __m512i
1351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1353{
1354 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1355 (__v16si)
1356 _mm512_setzero_si512 (),
1357 (__mmask16) __U);
1358}
1359#else
1360#define _mm512_srai_epi32(X, C) \
1361 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1362 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1363 (__mmask16)-1))
1364
1365#define _mm512_mask_srai_epi32(W, U, X, C) \
1366 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1367 (__v16si)(__m512i)(W),\
1368 (__mmask16)(U)))
1369
1370#define _mm512_maskz_srai_epi32(U, X, C) \
1371 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1372 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1373 (__mmask16)(U)))
1374#endif
1375
1376extern __inline __m512i
1377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378_mm512_sra_epi32 (__m512i __A, __m128i __B)
1379{
1380 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1381 (__v4si) __B,
1382 (__v16si)
4271e5cb 1383 _mm512_undefined_epi32 (),
756c5857
AI
1384 (__mmask16) -1);
1385}
1386
1387extern __inline __m512i
1388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1390{
1391 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1392 (__v4si) __B,
1393 (__v16si) __W,
1394 (__mmask16) __U);
1395}
1396
1397extern __inline __m512i
1398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1400{
1401 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1402 (__v4si) __B,
1403 (__v16si)
1404 _mm512_setzero_si512 (),
1405 (__mmask16) __U);
1406}
1407
075691af
AI
1408#ifdef __OPTIMIZE__
1409extern __inline __m128d
1410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1411_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1412{
1413 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1414 (__v2df) __B,
1415 __R);
1416}
1417
1418extern __inline __m128
1419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1421{
1422 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1423 (__v4sf) __B,
1424 __R);
1425}
1426
1427extern __inline __m128d
1428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1429_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1430{
1431 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1432 (__v2df) __B,
1433 __R);
1434}
1435
1436extern __inline __m128
1437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1439{
1440 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1441 (__v4sf) __B,
1442 __R);
1443}
1444
1445#else
1446#define _mm_add_round_sd(A, B, C) \
1447 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1448
1449#define _mm_add_round_ss(A, B, C) \
1450 (__m128)__builtin_ia32_addss_round(A, B, C)
1451
1452#define _mm_sub_round_sd(A, B, C) \
1453 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1454
1455#define _mm_sub_round_ss(A, B, C) \
1456 (__m128)__builtin_ia32_subss_round(A, B, C)
1457#endif
1458
756c5857
AI
1459#ifdef __OPTIMIZE__
1460extern __inline __m512i
1461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1462_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1463 const int __imm)
756c5857
AI
1464{
1465 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1466 (__v8di) __B,
b5fd0b71 1467 (__v8di) __C, __imm,
756c5857
AI
1468 (__mmask8) -1);
1469}
1470
1471extern __inline __m512i
1472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
b5fd0b71 1474 __m512i __C, const int __imm)
756c5857
AI
1475{
1476 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1477 (__v8di) __B,
b5fd0b71 1478 (__v8di) __C, __imm,
756c5857
AI
1479 (__mmask8) __U);
1480}
1481
1482extern __inline __m512i
1483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
b5fd0b71 1485 __m512i __C, const int __imm)
756c5857
AI
1486{
1487 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1488 (__v8di) __B,
1489 (__v8di) __C,
b5fd0b71 1490 __imm, (__mmask8) __U);
756c5857
AI
1491}
1492
1493extern __inline __m512i
1494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1495_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1496 const int __imm)
756c5857
AI
1497{
1498 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1499 (__v16si) __B,
1500 (__v16si) __C,
b5fd0b71 1501 __imm, (__mmask16) -1);
756c5857
AI
1502}
1503
1504extern __inline __m512i
1505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1506_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
b5fd0b71 1507 __m512i __C, const int __imm)
756c5857
AI
1508{
1509 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1510 (__v16si) __B,
1511 (__v16si) __C,
b5fd0b71 1512 __imm, (__mmask16) __U);
756c5857
AI
1513}
1514
1515extern __inline __m512i
1516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1517_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
b5fd0b71 1518 __m512i __C, const int __imm)
756c5857
AI
1519{
1520 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1521 (__v16si) __B,
1522 (__v16si) __C,
b5fd0b71 1523 __imm, (__mmask16) __U);
756c5857
AI
1524}
1525#else
1526#define _mm512_ternarylogic_epi64(A, B, C, I) \
1527 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1528 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1529#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1530 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1531 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1532#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1533 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1534 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1535#define _mm512_ternarylogic_epi32(A, B, C, I) \
1536 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1537 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1538 (__mmask16)-1))
1539#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1540 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1541 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1542 (__mmask16)(U)))
1543#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1544 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1545 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1546 (__mmask16)(U)))
1547#endif
1548
1549extern __inline __m512d
1550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1551_mm512_rcp14_pd (__m512d __A)
1552{
1553 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1554 (__v8df)
0b192937 1555 _mm512_undefined_pd (),
756c5857
AI
1556 (__mmask8) -1);
1557}
1558
1559extern __inline __m512d
1560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1561_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1562{
1563 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1564 (__v8df) __W,
1565 (__mmask8) __U);
1566}
1567
1568extern __inline __m512d
1569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1571{
1572 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1573 (__v8df)
1574 _mm512_setzero_pd (),
1575 (__mmask8) __U);
1576}
1577
1578extern __inline __m512
1579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1580_mm512_rcp14_ps (__m512 __A)
1581{
1582 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1583 (__v16sf)
0b192937 1584 _mm512_undefined_ps (),
756c5857
AI
1585 (__mmask16) -1);
1586}
1587
1588extern __inline __m512
1589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1591{
1592 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1593 (__v16sf) __W,
1594 (__mmask16) __U);
1595}
1596
1597extern __inline __m512
1598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1599_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1600{
1601 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1602 (__v16sf)
1603 _mm512_setzero_ps (),
1604 (__mmask16) __U);
1605}
1606
075691af
AI
1607extern __inline __m128d
1608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1609_mm_rcp14_sd (__m128d __A, __m128d __B)
1610{
df62b4af
IT
1611 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1612 (__v2df) __A);
075691af
AI
1613}
1614
1615extern __inline __m128
1616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1617_mm_rcp14_ss (__m128 __A, __m128 __B)
1618{
df62b4af
IT
1619 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1620 (__v4sf) __A);
075691af
AI
1621}
1622
756c5857
AI
1623extern __inline __m512d
1624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1625_mm512_rsqrt14_pd (__m512d __A)
1626{
1627 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1628 (__v8df)
0b192937 1629 _mm512_undefined_pd (),
756c5857
AI
1630 (__mmask8) -1);
1631}
1632
1633extern __inline __m512d
1634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1635_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1636{
1637 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1638 (__v8df) __W,
1639 (__mmask8) __U);
1640}
1641
1642extern __inline __m512d
1643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1644_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1645{
1646 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1647 (__v8df)
1648 _mm512_setzero_pd (),
1649 (__mmask8) __U);
1650}
1651
1652extern __inline __m512
1653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1654_mm512_rsqrt14_ps (__m512 __A)
1655{
1656 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1657 (__v16sf)
0b192937 1658 _mm512_undefined_ps (),
756c5857
AI
1659 (__mmask16) -1);
1660}
1661
1662extern __inline __m512
1663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1664_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1665{
1666 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1667 (__v16sf) __W,
1668 (__mmask16) __U);
1669}
1670
1671extern __inline __m512
1672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1673_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1674{
1675 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1676 (__v16sf)
1677 _mm512_setzero_ps (),
1678 (__mmask16) __U);
1679}
1680
075691af
AI
1681extern __inline __m128d
1682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1684{
df62b4af
IT
1685 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1686 (__v2df) __A);
075691af
AI
1687}
1688
1689extern __inline __m128
1690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1691_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1692{
df62b4af
IT
1693 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1694 (__v4sf) __A);
075691af
AI
1695}
1696
756c5857
AI
1697#ifdef __OPTIMIZE__
1698extern __inline __m512d
1699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1700_mm512_sqrt_round_pd (__m512d __A, const int __R)
1701{
1702 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1703 (__v8df)
0b192937 1704 _mm512_undefined_pd (),
756c5857
AI
1705 (__mmask8) -1, __R);
1706}
1707
1708extern __inline __m512d
1709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1710_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1711 const int __R)
1712{
1713 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1714 (__v8df) __W,
1715 (__mmask8) __U, __R);
1716}
1717
1718extern __inline __m512d
1719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1721{
1722 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1723 (__v8df)
1724 _mm512_setzero_pd (),
1725 (__mmask8) __U, __R);
1726}
1727
1728extern __inline __m512
1729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730_mm512_sqrt_round_ps (__m512 __A, const int __R)
1731{
1732 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1733 (__v16sf)
0b192937 1734 _mm512_undefined_ps (),
756c5857
AI
1735 (__mmask16) -1, __R);
1736}
1737
1738extern __inline __m512
1739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1741{
1742 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1743 (__v16sf) __W,
1744 (__mmask16) __U, __R);
1745}
1746
1747extern __inline __m512
1748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1749_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1750{
1751 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1752 (__v16sf)
1753 _mm512_setzero_ps (),
1754 (__mmask16) __U, __R);
1755}
1756
075691af
AI
1757extern __inline __m128d
1758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1759_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1760{
1761 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1762 (__v2df) __A,
1763 __R);
1764}
1765
1766extern __inline __m128
1767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1768_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1769{
1770 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1771 (__v4sf) __A,
1772 __R);
1773}
756c5857
AI
1774#else
1775#define _mm512_sqrt_round_pd(A, C) \
0b192937 1776 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
1777
1778#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1779 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1780
1781#define _mm512_maskz_sqrt_round_pd(U, A, C) \
1782 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1783
1784#define _mm512_sqrt_round_ps(A, C) \
0b192937 1785 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
1786
1787#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1788 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1789
1790#define _mm512_maskz_sqrt_round_ps(U, A, C) \
1791 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
1792
1793#define _mm_sqrt_round_sd(A, B, C) \
1794 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1795
1796#define _mm_sqrt_round_ss(A, B, C) \
1797 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
756c5857
AI
1798#endif
1799
1800extern __inline __m512i
1801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1802_mm512_cvtepi8_epi32 (__m128i __A)
1803{
1804 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1805 (__v16si)
4271e5cb 1806 _mm512_undefined_epi32 (),
756c5857
AI
1807 (__mmask16) -1);
1808}
1809
1810extern __inline __m512i
1811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1812_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1813{
1814 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1815 (__v16si) __W,
1816 (__mmask16) __U);
1817}
1818
1819extern __inline __m512i
1820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1821_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1822{
1823 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1824 (__v16si)
1825 _mm512_setzero_si512 (),
1826 (__mmask16) __U);
1827}
1828
1829extern __inline __m512i
1830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1831_mm512_cvtepi8_epi64 (__m128i __A)
1832{
1833 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1834 (__v8di)
4271e5cb 1835 _mm512_undefined_epi32 (),
756c5857
AI
1836 (__mmask8) -1);
1837}
1838
1839extern __inline __m512i
1840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1841_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1842{
1843 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1844 (__v8di) __W,
1845 (__mmask8) __U);
1846}
1847
1848extern __inline __m512i
1849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1850_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1851{
1852 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1853 (__v8di)
1854 _mm512_setzero_si512 (),
1855 (__mmask8) __U);
1856}
1857
1858extern __inline __m512i
1859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1860_mm512_cvtepi16_epi32 (__m256i __A)
1861{
1862 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1863 (__v16si)
4271e5cb 1864 _mm512_undefined_epi32 (),
756c5857
AI
1865 (__mmask16) -1);
1866}
1867
1868extern __inline __m512i
1869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1870_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1871{
1872 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1873 (__v16si) __W,
1874 (__mmask16) __U);
1875}
1876
1877extern __inline __m512i
1878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1879_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1880{
1881 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1882 (__v16si)
1883 _mm512_setzero_si512 (),
1884 (__mmask16) __U);
1885}
1886
1887extern __inline __m512i
1888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889_mm512_cvtepi16_epi64 (__m128i __A)
1890{
1891 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1892 (__v8di)
4271e5cb 1893 _mm512_undefined_epi32 (),
756c5857
AI
1894 (__mmask8) -1);
1895}
1896
1897extern __inline __m512i
1898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1899_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1900{
1901 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1902 (__v8di) __W,
1903 (__mmask8) __U);
1904}
1905
1906extern __inline __m512i
1907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1908_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1909{
1910 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1911 (__v8di)
1912 _mm512_setzero_si512 (),
1913 (__mmask8) __U);
1914}
1915
1916extern __inline __m512i
1917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1918_mm512_cvtepi32_epi64 (__m256i __X)
1919{
1920 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1921 (__v8di)
4271e5cb 1922 _mm512_undefined_epi32 (),
756c5857
AI
1923 (__mmask8) -1);
1924}
1925
1926extern __inline __m512i
1927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1928_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1929{
1930 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1931 (__v8di) __W,
1932 (__mmask8) __U);
1933}
1934
1935extern __inline __m512i
1936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1937_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1938{
1939 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1940 (__v8di)
1941 _mm512_setzero_si512 (),
1942 (__mmask8) __U);
1943}
1944
1945extern __inline __m512i
1946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1947_mm512_cvtepu8_epi32 (__m128i __A)
1948{
1949 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1950 (__v16si)
4271e5cb 1951 _mm512_undefined_epi32 (),
756c5857
AI
1952 (__mmask16) -1);
1953}
1954
1955extern __inline __m512i
1956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1957_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1958{
1959 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1960 (__v16si) __W,
1961 (__mmask16) __U);
1962}
1963
1964extern __inline __m512i
1965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1967{
1968 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1969 (__v16si)
1970 _mm512_setzero_si512 (),
1971 (__mmask16) __U);
1972}
1973
1974extern __inline __m512i
1975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1976_mm512_cvtepu8_epi64 (__m128i __A)
1977{
1978 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1979 (__v8di)
4271e5cb 1980 _mm512_undefined_epi32 (),
756c5857
AI
1981 (__mmask8) -1);
1982}
1983
1984extern __inline __m512i
1985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1987{
1988 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1989 (__v8di) __W,
1990 (__mmask8) __U);
1991}
1992
1993extern __inline __m512i
1994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1995_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1996{
1997 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1998 (__v8di)
1999 _mm512_setzero_si512 (),
2000 (__mmask8) __U);
2001}
2002
2003extern __inline __m512i
2004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2005_mm512_cvtepu16_epi32 (__m256i __A)
2006{
2007 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2008 (__v16si)
4271e5cb 2009 _mm512_undefined_epi32 (),
756c5857
AI
2010 (__mmask16) -1);
2011}
2012
2013extern __inline __m512i
2014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2015_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2016{
2017 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2018 (__v16si) __W,
2019 (__mmask16) __U);
2020}
2021
2022extern __inline __m512i
2023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2024_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2025{
2026 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2027 (__v16si)
2028 _mm512_setzero_si512 (),
2029 (__mmask16) __U);
2030}
2031
2032extern __inline __m512i
2033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2034_mm512_cvtepu16_epi64 (__m128i __A)
2035{
2036 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2037 (__v8di)
4271e5cb 2038 _mm512_undefined_epi32 (),
756c5857
AI
2039 (__mmask8) -1);
2040}
2041
2042extern __inline __m512i
2043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2044_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2045{
2046 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2047 (__v8di) __W,
2048 (__mmask8) __U);
2049}
2050
2051extern __inline __m512i
2052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2053_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2054{
2055 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2056 (__v8di)
2057 _mm512_setzero_si512 (),
2058 (__mmask8) __U);
2059}
2060
2061extern __inline __m512i
2062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063_mm512_cvtepu32_epi64 (__m256i __X)
2064{
2065 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2066 (__v8di)
4271e5cb 2067 _mm512_undefined_epi32 (),
756c5857
AI
2068 (__mmask8) -1);
2069}
2070
2071extern __inline __m512i
2072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2074{
2075 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2076 (__v8di) __W,
2077 (__mmask8) __U);
2078}
2079
2080extern __inline __m512i
2081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2083{
2084 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2085 (__v8di)
2086 _mm512_setzero_si512 (),
2087 (__mmask8) __U);
2088}
2089
2090#ifdef __OPTIMIZE__
2091extern __inline __m512d
2092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2093_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2094{
2095 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2096 (__v8df) __B,
2097 (__v8df)
0b192937 2098 _mm512_undefined_pd (),
756c5857
AI
2099 (__mmask8) -1, __R);
2100}
2101
2102extern __inline __m512d
2103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2104_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2105 __m512d __B, const int __R)
2106{
2107 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2108 (__v8df) __B,
2109 (__v8df) __W,
2110 (__mmask8) __U, __R);
2111}
2112
2113extern __inline __m512d
2114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2115_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2116 const int __R)
2117{
2118 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2119 (__v8df) __B,
2120 (__v8df)
2121 _mm512_setzero_pd (),
2122 (__mmask8) __U, __R);
2123}
2124
2125extern __inline __m512
2126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2127_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2128{
2129 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2130 (__v16sf) __B,
2131 (__v16sf)
0b192937 2132 _mm512_undefined_ps (),
756c5857
AI
2133 (__mmask16) -1, __R);
2134}
2135
2136extern __inline __m512
2137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2138_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2139 __m512 __B, const int __R)
2140{
2141 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2142 (__v16sf) __B,
2143 (__v16sf) __W,
2144 (__mmask16) __U, __R);
2145}
2146
2147extern __inline __m512
2148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2149_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2150{
2151 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2152 (__v16sf) __B,
2153 (__v16sf)
2154 _mm512_setzero_ps (),
2155 (__mmask16) __U, __R);
2156}
2157
2158extern __inline __m512d
2159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2161{
2162 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2163 (__v8df) __B,
2164 (__v8df)
0b192937 2165 _mm512_undefined_pd (),
756c5857
AI
2166 (__mmask8) -1, __R);
2167}
2168
2169extern __inline __m512d
2170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2171_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2172 __m512d __B, const int __R)
2173{
2174 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2175 (__v8df) __B,
2176 (__v8df) __W,
2177 (__mmask8) __U, __R);
2178}
2179
2180extern __inline __m512d
2181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2182_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2183 const int __R)
2184{
2185 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2186 (__v8df) __B,
2187 (__v8df)
2188 _mm512_setzero_pd (),
2189 (__mmask8) __U, __R);
2190}
2191
2192extern __inline __m512
2193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2195{
2196 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2197 (__v16sf) __B,
2198 (__v16sf)
0b192937 2199 _mm512_undefined_ps (),
756c5857
AI
2200 (__mmask16) -1, __R);
2201}
2202
2203extern __inline __m512
2204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2205_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2206 __m512 __B, const int __R)
2207{
2208 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2209 (__v16sf) __B,
2210 (__v16sf) __W,
2211 (__mmask16) __U, __R);
2212}
2213
2214extern __inline __m512
2215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2216_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2217{
2218 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2219 (__v16sf) __B,
2220 (__v16sf)
2221 _mm512_setzero_ps (),
2222 (__mmask16) __U, __R);
2223}
2224#else
2225#define _mm512_add_round_pd(A, B, C) \
0b192937 2226 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2227
2228#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2229 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2230
2231#define _mm512_maskz_add_round_pd(U, A, B, C) \
2232 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2233
2234#define _mm512_add_round_ps(A, B, C) \
0b192937 2235 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2236
2237#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2238 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2239
2240#define _mm512_maskz_add_round_ps(U, A, B, C) \
2241 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2242
2243#define _mm512_sub_round_pd(A, B, C) \
0b192937 2244 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2245
2246#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2247 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2248
2249#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2250 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2251
2252#define _mm512_sub_round_ps(A, B, C) \
0b192937 2253 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2254
2255#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2256 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2257
2258#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2259 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2260#endif
2261
2262#ifdef __OPTIMIZE__
2263extern __inline __m512d
2264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2265_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2266{
2267 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2268 (__v8df) __B,
2269 (__v8df)
0b192937 2270 _mm512_undefined_pd (),
756c5857
AI
2271 (__mmask8) -1, __R);
2272}
2273
2274extern __inline __m512d
2275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2277 __m512d __B, const int __R)
2278{
2279 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2280 (__v8df) __B,
2281 (__v8df) __W,
2282 (__mmask8) __U, __R);
2283}
2284
2285extern __inline __m512d
2286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2288 const int __R)
2289{
2290 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2291 (__v8df) __B,
2292 (__v8df)
2293 _mm512_setzero_pd (),
2294 (__mmask8) __U, __R);
2295}
2296
2297extern __inline __m512
2298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2299_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2300{
2301 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2302 (__v16sf) __B,
2303 (__v16sf)
0b192937 2304 _mm512_undefined_ps (),
756c5857
AI
2305 (__mmask16) -1, __R);
2306}
2307
2308extern __inline __m512
2309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2310_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2311 __m512 __B, const int __R)
2312{
2313 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2314 (__v16sf) __B,
2315 (__v16sf) __W,
2316 (__mmask16) __U, __R);
2317}
2318
2319extern __inline __m512
2320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2321_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2322{
2323 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2324 (__v16sf) __B,
2325 (__v16sf)
2326 _mm512_setzero_ps (),
2327 (__mmask16) __U, __R);
2328}
2329
2330extern __inline __m512d
2331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2332_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2333{
2334 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2335 (__v8df) __V,
2336 (__v8df)
0b192937 2337 _mm512_undefined_pd (),
756c5857
AI
2338 (__mmask8) -1, __R);
2339}
2340
2341extern __inline __m512d
2342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2343_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2344 __m512d __V, const int __R)
2345{
2346 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2347 (__v8df) __V,
2348 (__v8df) __W,
2349 (__mmask8) __U, __R);
2350}
2351
2352extern __inline __m512d
2353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2355 const int __R)
2356{
2357 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2358 (__v8df) __V,
2359 (__v8df)
2360 _mm512_setzero_pd (),
2361 (__mmask8) __U, __R);
2362}
2363
2364extern __inline __m512
2365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2366_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2367{
2368 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2369 (__v16sf) __B,
2370 (__v16sf)
0b192937 2371 _mm512_undefined_ps (),
756c5857
AI
2372 (__mmask16) -1, __R);
2373}
2374
2375extern __inline __m512
2376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2377_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2378 __m512 __B, const int __R)
2379{
2380 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2381 (__v16sf) __B,
2382 (__v16sf) __W,
2383 (__mmask16) __U, __R);
2384}
2385
2386extern __inline __m512
2387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2388_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2389{
2390 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2391 (__v16sf) __B,
2392 (__v16sf)
2393 _mm512_setzero_ps (),
2394 (__mmask16) __U, __R);
2395}
2396
075691af
AI
2397extern __inline __m128d
2398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2400{
2401 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2402 (__v2df) __B,
2403 __R);
2404}
2405
2406extern __inline __m128
2407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2408_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2409{
2410 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2411 (__v4sf) __B,
2412 __R);
2413}
2414
2415extern __inline __m128d
2416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2417_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2418{
2419 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2420 (__v2df) __B,
2421 __R);
2422}
2423
2424extern __inline __m128
2425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2426_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2427{
2428 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2429 (__v4sf) __B,
2430 __R);
2431}
2432
756c5857
AI
2433#else
2434#define _mm512_mul_round_pd(A, B, C) \
0b192937 2435 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2436
2437#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2438 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2439
2440#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2441 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2442
2443#define _mm512_mul_round_ps(A, B, C) \
0b192937 2444 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2445
2446#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2447 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2448
2449#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2450 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2451
2452#define _mm512_div_round_pd(A, B, C) \
0b192937 2453 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2454
2455#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2456 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2457
2458#define _mm512_maskz_div_round_pd(U, A, B, C) \
2459 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2460
2461#define _mm512_div_round_ps(A, B, C) \
0b192937 2462 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2463
2464#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2465 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2466
2467#define _mm512_maskz_div_round_ps(U, A, B, C) \
2468 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2469
2470#define _mm_mul_round_sd(A, B, C) \
2471 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2472
2473#define _mm_mul_round_ss(A, B, C) \
2474 (__m128)__builtin_ia32_mulss_round(A, B, C)
2475
2476#define _mm_div_round_sd(A, B, C) \
2477 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2478
2479#define _mm_div_round_ss(A, B, C) \
2480 (__m128)__builtin_ia32_divss_round(A, B, C)
756c5857
AI
2481#endif
2482
2483#ifdef __OPTIMIZE__
2484extern __inline __m512d
2485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2486_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2487{
2488 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2489 (__v8df) __B,
2490 (__v8df)
0b192937 2491 _mm512_undefined_pd (),
756c5857
AI
2492 (__mmask8) -1, __R);
2493}
2494
2495extern __inline __m512d
2496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2497_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2498 __m512d __B, const int __R)
2499{
2500 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2501 (__v8df) __B,
2502 (__v8df) __W,
2503 (__mmask8) __U, __R);
2504}
2505
2506extern __inline __m512d
2507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2508_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2509 const int __R)
2510{
2511 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2512 (__v8df) __B,
2513 (__v8df)
2514 _mm512_setzero_pd (),
2515 (__mmask8) __U, __R);
2516}
2517
2518extern __inline __m512
2519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2521{
2522 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2523 (__v16sf) __B,
2524 (__v16sf)
0b192937 2525 _mm512_undefined_ps (),
756c5857
AI
2526 (__mmask16) -1, __R);
2527}
2528
2529extern __inline __m512
2530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2531_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2532 __m512 __B, const int __R)
2533{
2534 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2535 (__v16sf) __B,
2536 (__v16sf) __W,
2537 (__mmask16) __U, __R);
2538}
2539
2540extern __inline __m512
2541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2543{
2544 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2545 (__v16sf) __B,
2546 (__v16sf)
2547 _mm512_setzero_ps (),
2548 (__mmask16) __U, __R);
2549}
2550
2551extern __inline __m512d
2552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2553_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2554{
2555 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2556 (__v8df) __B,
2557 (__v8df)
0b192937 2558 _mm512_undefined_pd (),
756c5857
AI
2559 (__mmask8) -1, __R);
2560}
2561
2562extern __inline __m512d
2563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2564_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2565 __m512d __B, const int __R)
2566{
2567 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2568 (__v8df) __B,
2569 (__v8df) __W,
2570 (__mmask8) __U, __R);
2571}
2572
2573extern __inline __m512d
2574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2575_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2576 const int __R)
2577{
2578 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2579 (__v8df) __B,
2580 (__v8df)
2581 _mm512_setzero_pd (),
2582 (__mmask8) __U, __R);
2583}
2584
2585extern __inline __m512
2586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2587_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2588{
2589 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2590 (__v16sf) __B,
2591 (__v16sf)
0b192937 2592 _mm512_undefined_ps (),
756c5857
AI
2593 (__mmask16) -1, __R);
2594}
2595
2596extern __inline __m512
2597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2598_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2599 __m512 __B, const int __R)
2600{
2601 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2602 (__v16sf) __B,
2603 (__v16sf) __W,
2604 (__mmask16) __U, __R);
2605}
2606
2607extern __inline __m512
2608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2609_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2610{
2611 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2612 (__v16sf) __B,
2613 (__v16sf)
2614 _mm512_setzero_ps (),
2615 (__mmask16) __U, __R);
2616}
2617#else
2618#define _mm512_max_round_pd(A, B, R) \
0b192937 2619 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
2620
2621#define _mm512_mask_max_round_pd(W, U, A, B, R) \
2622 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2623
2624#define _mm512_maskz_max_round_pd(U, A, B, R) \
2625 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2626
2627#define _mm512_max_round_ps(A, B, R) \
0b192937 2628 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
756c5857
AI
2629
2630#define _mm512_mask_max_round_ps(W, U, A, B, R) \
2631 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2632
2633#define _mm512_maskz_max_round_ps(U, A, B, R) \
2634 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2635
2636#define _mm512_min_round_pd(A, B, R) \
0b192937 2637 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
2638
2639#define _mm512_mask_min_round_pd(W, U, A, B, R) \
2640 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2641
2642#define _mm512_maskz_min_round_pd(U, A, B, R) \
2643 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2644
2645#define _mm512_min_round_ps(A, B, R) \
0b192937 2646 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
756c5857
AI
2647
2648#define _mm512_mask_min_round_ps(W, U, A, B, R) \
2649 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2650
2651#define _mm512_maskz_min_round_ps(U, A, B, R) \
2652 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2653#endif
2654
2655#ifdef __OPTIMIZE__
2656extern __inline __m512d
2657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2658_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2659{
2660 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2661 (__v8df) __B,
2662 (__v8df)
0b192937 2663 _mm512_undefined_pd (),
756c5857
AI
2664 (__mmask8) -1, __R);
2665}
2666
2667extern __inline __m512d
2668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2670 __m512d __B, const int __R)
2671{
2672 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2673 (__v8df) __B,
2674 (__v8df) __W,
2675 (__mmask8) __U, __R);
2676}
2677
2678extern __inline __m512d
2679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2681 const int __R)
2682{
2683 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2684 (__v8df) __B,
2685 (__v8df)
2686 _mm512_setzero_pd (),
2687 (__mmask8) __U, __R);
2688}
2689
2690extern __inline __m512
2691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2693{
2694 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2695 (__v16sf) __B,
2696 (__v16sf)
0b192937 2697 _mm512_undefined_ps (),
756c5857
AI
2698 (__mmask16) -1, __R);
2699}
2700
2701extern __inline __m512
2702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2703_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2704 __m512 __B, const int __R)
2705{
2706 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2707 (__v16sf) __B,
2708 (__v16sf) __W,
2709 (__mmask16) __U, __R);
2710}
2711
2712extern __inline __m512
2713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2714_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2715 const int __R)
2716{
2717 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2718 (__v16sf) __B,
2719 (__v16sf)
2720 _mm512_setzero_ps (),
2721 (__mmask16) __U, __R);
2722}
2723
075691af
AI
2724extern __inline __m128d
2725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2727{
2728 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2729 (__v2df) __B,
2730 __R);
2731}
2732
2733extern __inline __m128
2734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2735_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2736{
2737 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2738 (__v4sf) __B,
2739 __R);
2740}
756c5857
AI
2741#else
2742#define _mm512_scalef_round_pd(A, B, C) \
0b192937 2743 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2744
2745#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2746 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2747
2748#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2749 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2750
2751#define _mm512_scalef_round_ps(A, B, C) \
0b192937 2752 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2753
2754#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2755 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2756
2757#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2758 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2759
2760#define _mm_scalef_round_sd(A, B, C) \
2761 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2762
2763#define _mm_scalef_round_ss(A, B, C) \
2764 (__m128)__builtin_ia32_scalefss_round(A, B, C)
756c5857
AI
2765#endif
2766
2767#ifdef __OPTIMIZE__
2768extern __inline __m512d
2769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2770_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2771{
2772 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2773 (__v8df) __B,
2774 (__v8df) __C,
2775 (__mmask8) -1, __R);
2776}
2777
2778extern __inline __m512d
2779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2780_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2781 __m512d __C, const int __R)
2782{
2783 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2784 (__v8df) __B,
2785 (__v8df) __C,
2786 (__mmask8) __U, __R);
2787}
2788
2789extern __inline __m512d
2790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2791_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2792 __mmask8 __U, const int __R)
2793{
2794 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2795 (__v8df) __B,
2796 (__v8df) __C,
2797 (__mmask8) __U, __R);
2798}
2799
2800extern __inline __m512d
2801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2802_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2803 __m512d __C, const int __R)
2804{
2805 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2806 (__v8df) __B,
2807 (__v8df) __C,
2808 (__mmask8) __U, __R);
2809}
2810
2811extern __inline __m512
2812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2813_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2814{
2815 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2816 (__v16sf) __B,
2817 (__v16sf) __C,
2818 (__mmask16) -1, __R);
2819}
2820
2821extern __inline __m512
2822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2823_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2824 __m512 __C, const int __R)
2825{
2826 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2827 (__v16sf) __B,
2828 (__v16sf) __C,
2829 (__mmask16) __U, __R);
2830}
2831
2832extern __inline __m512
2833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2835 __mmask16 __U, const int __R)
2836{
2837 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2838 (__v16sf) __B,
2839 (__v16sf) __C,
2840 (__mmask16) __U, __R);
2841}
2842
2843extern __inline __m512
2844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2846 __m512 __C, const int __R)
2847{
2848 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2849 (__v16sf) __B,
2850 (__v16sf) __C,
2851 (__mmask16) __U, __R);
2852}
2853
2854extern __inline __m512d
2855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2856_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2857{
2858 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2859 (__v8df) __B,
2860 -(__v8df) __C,
2861 (__mmask8) -1, __R);
2862}
2863
2864extern __inline __m512d
2865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2866_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2867 __m512d __C, const int __R)
2868{
2869 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2870 (__v8df) __B,
2871 -(__v8df) __C,
2872 (__mmask8) __U, __R);
2873}
2874
2875extern __inline __m512d
2876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2877_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2878 __mmask8 __U, const int __R)
2879{
2880 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2881 (__v8df) __B,
2882 (__v8df) __C,
2883 (__mmask8) __U, __R);
2884}
2885
2886extern __inline __m512d
2887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2888_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2889 __m512d __C, const int __R)
2890{
2891 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2892 (__v8df) __B,
2893 -(__v8df) __C,
2894 (__mmask8) __U, __R);
2895}
2896
2897extern __inline __m512
2898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2899_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2900{
2901 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2902 (__v16sf) __B,
2903 -(__v16sf) __C,
2904 (__mmask16) -1, __R);
2905}
2906
2907extern __inline __m512
2908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2909_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2910 __m512 __C, const int __R)
2911{
2912 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2913 (__v16sf) __B,
2914 -(__v16sf) __C,
2915 (__mmask16) __U, __R);
2916}
2917
2918extern __inline __m512
2919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2920_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2921 __mmask16 __U, const int __R)
2922{
2923 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2924 (__v16sf) __B,
2925 (__v16sf) __C,
2926 (__mmask16) __U, __R);
2927}
2928
2929extern __inline __m512
2930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2931_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2932 __m512 __C, const int __R)
2933{
2934 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2935 (__v16sf) __B,
2936 -(__v16sf) __C,
2937 (__mmask16) __U, __R);
2938}
2939
2940extern __inline __m512d
2941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2942_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2943{
2944 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2945 (__v8df) __B,
2946 (__v8df) __C,
2947 (__mmask8) -1, __R);
2948}
2949
2950extern __inline __m512d
2951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2952_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2953 __m512d __C, const int __R)
2954{
2955 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2956 (__v8df) __B,
2957 (__v8df) __C,
2958 (__mmask8) __U, __R);
2959}
2960
2961extern __inline __m512d
2962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2963_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2964 __mmask8 __U, const int __R)
2965{
2966 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2967 (__v8df) __B,
2968 (__v8df) __C,
2969 (__mmask8) __U, __R);
2970}
2971
2972extern __inline __m512d
2973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2974_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2975 __m512d __C, const int __R)
2976{
2977 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2978 (__v8df) __B,
2979 (__v8df) __C,
2980 (__mmask8) __U, __R);
2981}
2982
2983extern __inline __m512
2984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2985_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2986{
2987 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2988 (__v16sf) __B,
2989 (__v16sf) __C,
2990 (__mmask16) -1, __R);
2991}
2992
2993extern __inline __m512
2994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2995_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2996 __m512 __C, const int __R)
2997{
2998 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2999 (__v16sf) __B,
3000 (__v16sf) __C,
3001 (__mmask16) __U, __R);
3002}
3003
3004extern __inline __m512
3005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3007 __mmask16 __U, const int __R)
3008{
3009 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3010 (__v16sf) __B,
3011 (__v16sf) __C,
3012 (__mmask16) __U, __R);
3013}
3014
3015extern __inline __m512
3016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3017_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3018 __m512 __C, const int __R)
3019{
3020 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3021 (__v16sf) __B,
3022 (__v16sf) __C,
3023 (__mmask16) __U, __R);
3024}
3025
3026extern __inline __m512d
3027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3028_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3029{
3030 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3031 (__v8df) __B,
3032 -(__v8df) __C,
3033 (__mmask8) -1, __R);
3034}
3035
3036extern __inline __m512d
3037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3038_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3039 __m512d __C, const int __R)
3040{
3041 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3042 (__v8df) __B,
3043 -(__v8df) __C,
3044 (__mmask8) __U, __R);
3045}
3046
3047extern __inline __m512d
3048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3049_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3050 __mmask8 __U, const int __R)
3051{
3052 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3053 (__v8df) __B,
3054 (__v8df) __C,
3055 (__mmask8) __U, __R);
3056}
3057
3058extern __inline __m512d
3059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3060_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3061 __m512d __C, const int __R)
3062{
3063 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3064 (__v8df) __B,
3065 -(__v8df) __C,
3066 (__mmask8) __U, __R);
3067}
3068
3069extern __inline __m512
3070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3071_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3072{
3073 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3074 (__v16sf) __B,
3075 -(__v16sf) __C,
3076 (__mmask16) -1, __R);
3077}
3078
3079extern __inline __m512
3080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3081_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3082 __m512 __C, const int __R)
3083{
3084 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3085 (__v16sf) __B,
3086 -(__v16sf) __C,
3087 (__mmask16) __U, __R);
3088}
3089
3090extern __inline __m512
3091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3092_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3093 __mmask16 __U, const int __R)
3094{
3095 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3096 (__v16sf) __B,
3097 (__v16sf) __C,
3098 (__mmask16) __U, __R);
3099}
3100
3101extern __inline __m512
3102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3103_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3104 __m512 __C, const int __R)
3105{
3106 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3107 (__v16sf) __B,
3108 -(__v16sf) __C,
3109 (__mmask16) __U, __R);
3110}
3111
3112extern __inline __m512d
3113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3114_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3115{
3116 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3117 (__v8df) __B,
3118 (__v8df) __C,
3119 (__mmask8) -1, __R);
3120}
3121
3122extern __inline __m512d
3123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3124_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3125 __m512d __C, const int __R)
3126{
3127 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3128 (__v8df) __B,
3129 (__v8df) __C,
3130 (__mmask8) __U, __R);
3131}
3132
3133extern __inline __m512d
3134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3135_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3136 __mmask8 __U, const int __R)
3137{
3138 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3139 (__v8df) __B,
3140 (__v8df) __C,
3141 (__mmask8) __U, __R);
3142}
3143
3144extern __inline __m512d
3145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3146_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3147 __m512d __C, const int __R)
3148{
3149 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3150 (__v8df) __B,
3151 (__v8df) __C,
3152 (__mmask8) __U, __R);
3153}
3154
3155extern __inline __m512
3156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3157_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3158{
3159 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3160 (__v16sf) __B,
3161 (__v16sf) __C,
3162 (__mmask16) -1, __R);
3163}
3164
3165extern __inline __m512
3166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3167_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3168 __m512 __C, const int __R)
3169{
3170 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3171 (__v16sf) __B,
3172 (__v16sf) __C,
3173 (__mmask16) __U, __R);
3174}
3175
3176extern __inline __m512
3177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3178_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3179 __mmask16 __U, const int __R)
3180{
3181 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3182 (__v16sf) __B,
3183 (__v16sf) __C,
3184 (__mmask16) __U, __R);
3185}
3186
3187extern __inline __m512
3188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3189_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3190 __m512 __C, const int __R)
3191{
3192 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3193 (__v16sf) __B,
3194 (__v16sf) __C,
3195 (__mmask16) __U, __R);
3196}
3197
3198extern __inline __m512d
3199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3200_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3201{
3202 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3203 (__v8df) __B,
3204 -(__v8df) __C,
3205 (__mmask8) -1, __R);
3206}
3207
3208extern __inline __m512d
3209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3210_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3211 __m512d __C, const int __R)
3212{
3213 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3214 (__v8df) __B,
3215 (__v8df) __C,
3216 (__mmask8) __U, __R);
3217}
3218
3219extern __inline __m512d
3220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3221_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3222 __mmask8 __U, const int __R)
3223{
3224 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3225 (__v8df) __B,
3226 (__v8df) __C,
3227 (__mmask8) __U, __R);
3228}
3229
3230extern __inline __m512d
3231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3232_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3233 __m512d __C, const int __R)
3234{
3235 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3236 (__v8df) __B,
3237 -(__v8df) __C,
3238 (__mmask8) __U, __R);
3239}
3240
3241extern __inline __m512
3242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3243_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3244{
3245 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3246 (__v16sf) __B,
3247 -(__v16sf) __C,
3248 (__mmask16) -1, __R);
3249}
3250
3251extern __inline __m512
3252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3253_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3254 __m512 __C, const int __R)
3255{
3256 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3257 (__v16sf) __B,
3258 (__v16sf) __C,
3259 (__mmask16) __U, __R);
3260}
3261
3262extern __inline __m512
3263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3265 __mmask16 __U, const int __R)
3266{
3267 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3268 (__v16sf) __B,
3269 (__v16sf) __C,
3270 (__mmask16) __U, __R);
3271}
3272
3273extern __inline __m512
3274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3276 __m512 __C, const int __R)
3277{
3278 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3279 (__v16sf) __B,
3280 -(__v16sf) __C,
3281 (__mmask16) __U, __R);
3282}
3283#else
3284#define _mm512_fmadd_round_pd(A, B, C, R) \
3285 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3286
3287#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3288 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3289
3290#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3291 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3292
3293#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3294 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3295
3296#define _mm512_fmadd_round_ps(A, B, C, R) \
3297 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3298
3299#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3300 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3301
3302#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3303 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3304
3305#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3306 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3307
3308#define _mm512_fmsub_round_pd(A, B, C, R) \
3309 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3310
3311#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3312 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3313
3314#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3315 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3316
3317#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3318 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3319
3320#define _mm512_fmsub_round_ps(A, B, C, R) \
3321 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3322
3323#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3324 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3325
3326#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3327 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3328
3329#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3330 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3331
3332#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3333 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3334
3335#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3336 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3337
3338#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3339 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3340
3341#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3342 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3343
3344#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3345 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3346
3347#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3348 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3349
3350#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3351 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3352
3353#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3354 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3355
3356#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3357 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3358
3359#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3360 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3361
3362#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3363 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3364
3365#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3366 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3367
3368#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3369 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3370
3371#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3372 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3373
3374#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3375 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3376
3377#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3378 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3379
3380#define _mm512_fnmadd_round_pd(A, B, C, R) \
3381 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3382
3383#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3384 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3385
3386#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3387 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3388
3389#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3390 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3391
3392#define _mm512_fnmadd_round_ps(A, B, C, R) \
3393 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3394
3395#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3396 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3397
3398#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3399 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3400
3401#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3402 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3403
3404#define _mm512_fnmsub_round_pd(A, B, C, R) \
3405 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3406
3407#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3408 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3409
3410#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3411 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3412
3413#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3414 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3415
3416#define _mm512_fnmsub_round_ps(A, B, C, R) \
3417 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3418
3419#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3420 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3421
3422#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3423 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3424
3425#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3426 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3427#endif
3428
3429extern __inline __m512i
3430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3431_mm512_abs_epi64 (__m512i __A)
3432{
3433 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3434 (__v8di)
4271e5cb 3435 _mm512_undefined_epi32 (),
756c5857
AI
3436 (__mmask8) -1);
3437}
3438
3439extern __inline __m512i
3440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3441_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3442{
3443 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3444 (__v8di) __W,
3445 (__mmask8) __U);
3446}
3447
3448extern __inline __m512i
3449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3450_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3451{
3452 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3453 (__v8di)
3454 _mm512_setzero_si512 (),
3455 (__mmask8) __U);
3456}
3457
3458extern __inline __m512i
3459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3460_mm512_abs_epi32 (__m512i __A)
3461{
3462 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3463 (__v16si)
4271e5cb 3464 _mm512_undefined_epi32 (),
756c5857
AI
3465 (__mmask16) -1);
3466}
3467
3468extern __inline __m512i
3469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3470_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3471{
3472 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3473 (__v16si) __W,
3474 (__mmask16) __U);
3475}
3476
3477extern __inline __m512i
3478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3480{
3481 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3482 (__v16si)
3483 _mm512_setzero_si512 (),
3484 (__mmask16) __U);
3485}
3486
3487extern __inline __m512
3488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3489_mm512_broadcastss_ps (__m128 __A)
3490{
0b192937
UD
3491 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3492 (__v16sf)
3493 _mm512_undefined_ps (),
756c5857
AI
3494 (__mmask16) -1);
3495}
3496
3497extern __inline __m512
3498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3499_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3500{
3501 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3502 (__v16sf) __O, __M);
3503}
3504
3505extern __inline __m512
3506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3507_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3508{
3509 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3510 (__v16sf)
3511 _mm512_setzero_ps (),
3512 __M);
3513}
3514
3515extern __inline __m512d
3516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3517_mm512_broadcastsd_pd (__m128d __A)
3518{
0b192937
UD
3519 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3520 (__v8df)
3521 _mm512_undefined_pd (),
756c5857
AI
3522 (__mmask8) -1);
3523}
3524
3525extern __inline __m512d
3526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3527_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3528{
3529 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3530 (__v8df) __O, __M);
3531}
3532
3533extern __inline __m512d
3534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3536{
3537 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3538 (__v8df)
3539 _mm512_setzero_pd (),
3540 __M);
3541}
3542
3543extern __inline __m512i
3544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545_mm512_broadcastd_epi32 (__m128i __A)
3546{
0b192937
UD
3547 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3548 (__v16si)
4271e5cb 3549 _mm512_undefined_epi32 (),
756c5857
AI
3550 (__mmask16) -1);
3551}
3552
3553extern __inline __m512i
3554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3555_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3556{
3557 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3558 (__v16si) __O, __M);
3559}
3560
3561extern __inline __m512i
3562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3563_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3564{
3565 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3566 (__v16si)
3567 _mm512_setzero_si512 (),
3568 __M);
3569}
3570
3571extern __inline __m512i
3572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3573_mm512_set1_epi32 (int __A)
3574{
0b192937
UD
3575 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3576 (__v16si)
4271e5cb 3577 _mm512_undefined_epi32 (),
756c5857
AI
3578 (__mmask16)(-1));
3579}
3580
3581extern __inline __m512i
3582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3583_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3584{
3585 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3586 __M);
3587}
3588
3589extern __inline __m512i
3590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3591_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3592{
3593 return (__m512i)
3594 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3595 (__v16si) _mm512_setzero_si512 (),
3596 __M);
3597}
3598
3599extern __inline __m512i
3600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3601_mm512_broadcastq_epi64 (__m128i __A)
3602{
0b192937
UD
3603 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3604 (__v8di)
4271e5cb 3605 _mm512_undefined_epi32 (),
756c5857
AI
3606 (__mmask8) -1);
3607}
3608
3609extern __inline __m512i
3610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3612{
3613 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3614 (__v8di) __O, __M);
3615}
3616
3617extern __inline __m512i
3618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3619_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3620{
3621 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3622 (__v8di)
3623 _mm512_setzero_si512 (),
3624 __M);
3625}
3626
3627extern __inline __m512i
3628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3629_mm512_set1_epi64 (long long __A)
3630{
0b192937
UD
3631 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3632 (__v8di)
4271e5cb 3633 _mm512_undefined_epi32 (),
756c5857 3634 (__mmask8)(-1));
756c5857
AI
3635}
3636
3637extern __inline __m512i
3638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3639_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3640{
756c5857
AI
3641 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3642 __M);
756c5857
AI
3643}
3644
3645extern __inline __m512i
3646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3647_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3648{
756c5857
AI
3649 return (__m512i)
3650 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3651 (__v8di) _mm512_setzero_si512 (),
3652 __M);
756c5857
AI
3653}
3654
3655extern __inline __m512
3656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3657_mm512_broadcast_f32x4 (__m128 __A)
3658{
0b192937
UD
3659 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3660 (__v16sf)
3661 _mm512_undefined_ps (),
756c5857
AI
3662 (__mmask16) -1);
3663}
3664
3665extern __inline __m512
3666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3667_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3668{
3669 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3670 (__v16sf) __O,
3671 __M);
3672}
3673
3674extern __inline __m512
3675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3676_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3677{
3678 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3679 (__v16sf)
3680 _mm512_setzero_ps (),
3681 __M);
3682}
3683
3684extern __inline __m512i
3685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3686_mm512_broadcast_i32x4 (__m128i __A)
3687{
756c5857 3688 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
0b192937 3689 (__v16si)
4271e5cb 3690 _mm512_undefined_epi32 (),
756c5857
AI
3691 (__mmask16) -1);
3692}
3693
3694extern __inline __m512i
3695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3696_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3697{
3698 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3699 (__v16si) __O,
3700 __M);
3701}
3702
3703extern __inline __m512i
3704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3705_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3706{
3707 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3708 (__v16si)
3709 _mm512_setzero_si512 (),
3710 __M);
3711}
3712
3713extern __inline __m512d
3714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3715_mm512_broadcast_f64x4 (__m256d __A)
3716{
756c5857 3717 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
0b192937
UD
3718 (__v8df)
3719 _mm512_undefined_pd (),
756c5857
AI
3720 (__mmask8) -1);
3721}
3722
3723extern __inline __m512d
3724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3725_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3726{
3727 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3728 (__v8df) __O,
3729 __M);
3730}
3731
3732extern __inline __m512d
3733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3734_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3735{
3736 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3737 (__v8df)
3738 _mm512_setzero_pd (),
3739 __M);
3740}
3741
3742extern __inline __m512i
3743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3744_mm512_broadcast_i64x4 (__m256i __A)
3745{
756c5857 3746 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
0b192937 3747 (__v8di)
4271e5cb 3748 _mm512_undefined_epi32 (),
756c5857
AI
3749 (__mmask8) -1);
3750}
3751
3752extern __inline __m512i
3753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3754_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3755{
3756 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3757 (__v8di) __O,
3758 __M);
3759}
3760
3761extern __inline __m512i
3762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3763_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3764{
3765 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3766 (__v8di)
3767 _mm512_setzero_si512 (),
3768 __M);
3769}
3770
3771typedef enum
3772{
3773 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3774 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3775 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3776 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3777 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3778 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3779 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3780 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3781 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3782 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3783 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3784 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3785 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3786 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3787 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3788 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3789 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3790 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3791 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3792 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3793 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3794 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3795 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3796 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3797 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3798 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3799 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3800 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3801 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3802 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3803 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3804 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3805 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3806 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3807 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3808 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3809 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3810 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3811 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3812 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3813 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3814 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3815 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3816 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3817 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3818 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3819 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3820 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3821 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3822 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3823 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3824 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3825 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3826 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3827 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3828 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3829 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3830 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3831 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3832 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3833 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3834 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3835 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3836 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3837 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3838 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3839 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3840 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3841 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3842 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3843 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3844 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3845 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3846 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3847 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3848 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3849 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3850 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3851 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3852 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3853 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3854 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3855 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3856 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3857 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3858 _MM_PERM_DDDD = 0xFF
3859} _MM_PERM_ENUM;
3860
3861#ifdef __OPTIMIZE__
3862extern __inline __m512i
3863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3864_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3865{
3866 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3867 __mask,
3868 (__v16si)
4271e5cb 3869 _mm512_undefined_epi32 (),
756c5857
AI
3870 (__mmask16) -1);
3871}
3872
3873extern __inline __m512i
3874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3875_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3876 _MM_PERM_ENUM __mask)
3877{
3878 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3879 __mask,
3880 (__v16si) __W,
3881 (__mmask16) __U);
3882}
3883
3884extern __inline __m512i
3885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3886_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3887{
3888 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3889 __mask,
3890 (__v16si)
3891 _mm512_setzero_si512 (),
3892 (__mmask16) __U);
3893}
3894
3895extern __inline __m512i
3896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3897_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3898{
3899 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3900 (__v8di) __B, __imm,
3901 (__v8di)
4271e5cb 3902 _mm512_undefined_epi32 (),
756c5857
AI
3903 (__mmask8) -1);
3904}
3905
3906extern __inline __m512i
3907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3908_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3909 __m512i __B, const int __imm)
3910{
3911 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3912 (__v8di) __B, __imm,
3913 (__v8di) __W,
3914 (__mmask8) __U);
3915}
3916
3917extern __inline __m512i
3918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3919_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3920 const int __imm)
3921{
3922 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3923 (__v8di) __B, __imm,
3924 (__v8di)
3925 _mm512_setzero_si512 (),
3926 (__mmask8) __U);
3927}
3928
3929extern __inline __m512i
3930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3931_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3932{
3933 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3934 (__v16si) __B,
3935 __imm,
3936 (__v16si)
4271e5cb 3937 _mm512_undefined_epi32 (),
756c5857
AI
3938 (__mmask16) -1);
3939}
3940
3941extern __inline __m512i
3942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3944 __m512i __B, const int __imm)
3945{
3946 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3947 (__v16si) __B,
3948 __imm,
3949 (__v16si) __W,
3950 (__mmask16) __U);
3951}
3952
3953extern __inline __m512i
3954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3955_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3956 const int __imm)
3957{
3958 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3959 (__v16si) __B,
3960 __imm,
3961 (__v16si)
3962 _mm512_setzero_si512 (),
3963 (__mmask16) __U);
3964}
3965
3966extern __inline __m512d
3967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3968_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3969{
3970 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3971 (__v8df) __B, __imm,
3972 (__v8df)
0b192937 3973 _mm512_undefined_pd (),
756c5857
AI
3974 (__mmask8) -1);
3975}
3976
3977extern __inline __m512d
3978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3980 __m512d __B, const int __imm)
3981{
3982 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3983 (__v8df) __B, __imm,
3984 (__v8df) __W,
3985 (__mmask8) __U);
3986}
3987
3988extern __inline __m512d
3989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3991 const int __imm)
3992{
3993 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3994 (__v8df) __B, __imm,
3995 (__v8df)
3996 _mm512_setzero_pd (),
3997 (__mmask8) __U);
3998}
3999
4000extern __inline __m512
4001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4002_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4003{
4004 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4005 (__v16sf) __B, __imm,
4006 (__v16sf)
0b192937 4007 _mm512_undefined_ps (),
756c5857
AI
4008 (__mmask16) -1);
4009}
4010
4011extern __inline __m512
4012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4013_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4014 __m512 __B, const int __imm)
4015{
4016 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4017 (__v16sf) __B, __imm,
4018 (__v16sf) __W,
4019 (__mmask16) __U);
4020}
4021
4022extern __inline __m512
4023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4024_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4025 const int __imm)
4026{
4027 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4028 (__v16sf) __B, __imm,
4029 (__v16sf)
4030 _mm512_setzero_ps (),
4031 (__mmask16) __U);
4032}
4033
4034#else
4035#define _mm512_shuffle_epi32(X, C) \
4036 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 4037 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4038 (__mmask16)-1))
4039
4040#define _mm512_mask_shuffle_epi32(W, U, X, C) \
4041 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4042 (__v16si)(__m512i)(W),\
4043 (__mmask16)(U)))
4044
4045#define _mm512_maskz_shuffle_epi32(U, X, C) \
4046 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4047 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4048 (__mmask16)(U)))
4049
4050#define _mm512_shuffle_i64x2(X, Y, C) \
4051 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4052 (__v8di)(__m512i)(Y), (int)(C),\
4271e5cb 4053 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4054 (__mmask8)-1))
4055
4056#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4057 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4058 (__v8di)(__m512i)(Y), (int)(C),\
4059 (__v8di)(__m512i)(W),\
4060 (__mmask8)(U)))
4061
4062#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4063 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4064 (__v8di)(__m512i)(Y), (int)(C),\
4065 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4066 (__mmask8)(U)))
4067
4068#define _mm512_shuffle_i32x4(X, Y, C) \
4069 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4070 (__v16si)(__m512i)(Y), (int)(C),\
4271e5cb 4071 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4072 (__mmask16)-1))
4073
4074#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4075 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4076 (__v16si)(__m512i)(Y), (int)(C),\
4077 (__v16si)(__m512i)(W),\
4078 (__mmask16)(U)))
4079
4080#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4081 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4082 (__v16si)(__m512i)(Y), (int)(C),\
4083 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4084 (__mmask16)(U)))
4085
4086#define _mm512_shuffle_f64x2(X, Y, C) \
4087 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4088 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 4089 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
4090 (__mmask8)-1))
4091
4092#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4093 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4094 (__v8df)(__m512d)(Y), (int)(C),\
4095 (__v8df)(__m512d)(W),\
4096 (__mmask8)(U)))
4097
4098#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4099 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4100 (__v8df)(__m512d)(Y), (int)(C),\
4101 (__v8df)(__m512d)_mm512_setzero_pd(),\
4102 (__mmask8)(U)))
4103
4104#define _mm512_shuffle_f32x4(X, Y, C) \
4105 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4106 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 4107 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
4108 (__mmask16)-1))
4109
4110#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4111 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4112 (__v16sf)(__m512)(Y), (int)(C),\
4113 (__v16sf)(__m512)(W),\
4114 (__mmask16)(U)))
4115
4116#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4117 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4118 (__v16sf)(__m512)(Y), (int)(C),\
4119 (__v16sf)(__m512)_mm512_setzero_ps(),\
4120 (__mmask16)(U)))
4121#endif
4122
4123extern __inline __m512i
4124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4125_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4126{
4127 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4128 (__v16si) __B,
4129 (__v16si)
4271e5cb 4130 _mm512_undefined_epi32 (),
756c5857
AI
4131 (__mmask16) -1);
4132}
4133
4134extern __inline __m512i
4135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4136_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4137{
4138 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4139 (__v16si) __B,
4140 (__v16si) __W,
4141 (__mmask16) __U);
4142}
4143
4144extern __inline __m512i
4145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4146_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4147{
4148 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4149 (__v16si) __B,
4150 (__v16si)
4151 _mm512_setzero_si512 (),
4152 (__mmask16) __U);
4153}
4154
4155extern __inline __m512i
4156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4157_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4158{
4159 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4160 (__v16si) __B,
4161 (__v16si)
4271e5cb 4162 _mm512_undefined_epi32 (),
756c5857
AI
4163 (__mmask16) -1);
4164}
4165
4166extern __inline __m512i
4167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4168_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4169{
4170 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4171 (__v16si) __B,
4172 (__v16si) __W,
4173 (__mmask16) __U);
4174}
4175
4176extern __inline __m512i
4177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4178_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4179{
4180 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4181 (__v16si) __B,
4182 (__v16si)
4183 _mm512_setzero_si512 (),
4184 (__mmask16) __U);
4185}
4186
4187extern __inline __m512i
4188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4189_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4190{
4191 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4192 (__v8di) __B,
4193 (__v8di)
4271e5cb 4194 _mm512_undefined_epi32 (),
756c5857
AI
4195 (__mmask8) -1);
4196}
4197
4198extern __inline __m512i
4199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4200_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4201{
4202 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4203 (__v8di) __B,
4204 (__v8di) __W,
4205 (__mmask8) __U);
4206}
4207
4208extern __inline __m512i
4209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4210_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4211{
4212 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4213 (__v8di) __B,
4214 (__v8di)
4215 _mm512_setzero_si512 (),
4216 (__mmask8) __U);
4217}
4218
4219extern __inline __m512i
4220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4221_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4222{
4223 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4224 (__v8di) __B,
4225 (__v8di)
4271e5cb 4226 _mm512_undefined_epi32 (),
756c5857
AI
4227 (__mmask8) -1);
4228}
4229
4230extern __inline __m512i
4231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4232_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4233{
4234 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4235 (__v8di) __B,
4236 (__v8di) __W,
4237 (__mmask8) __U);
4238}
4239
4240extern __inline __m512i
4241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4242_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4243{
4244 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4245 (__v8di) __B,
4246 (__v8di)
4247 _mm512_setzero_si512 (),
4248 (__mmask8) __U);
4249}
4250
4251#ifdef __OPTIMIZE__
4252extern __inline __m256i
4253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4255{
4256 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4257 (__v8si)
0b192937 4258 _mm256_undefined_si256 (),
756c5857
AI
4259 (__mmask8) -1, __R);
4260}
4261
4262extern __inline __m256i
4263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4264_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4265 const int __R)
4266{
4267 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4268 (__v8si) __W,
4269 (__mmask8) __U, __R);
4270}
4271
4272extern __inline __m256i
4273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4274_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4275{
4276 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4277 (__v8si)
4278 _mm256_setzero_si256 (),
4279 (__mmask8) __U, __R);
4280}
4281
4282extern __inline __m256i
4283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4284_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4285{
4286 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4287 (__v8si)
0b192937 4288 _mm256_undefined_si256 (),
756c5857
AI
4289 (__mmask8) -1, __R);
4290}
4291
4292extern __inline __m256i
4293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4294_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4295 const int __R)
4296{
4297 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4298 (__v8si) __W,
4299 (__mmask8) __U, __R);
4300}
4301
4302extern __inline __m256i
4303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4305{
4306 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4307 (__v8si)
4308 _mm256_setzero_si256 (),
4309 (__mmask8) __U, __R);
4310}
4311#else
4312#define _mm512_cvtt_roundpd_epi32(A, B) \
0b192937 4313 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4314
4315#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4316 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4317
4318#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4319 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4320
4321#define _mm512_cvtt_roundpd_epu32(A, B) \
0b192937 4322 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4323
4324#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4325 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4326
4327#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4328 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4329#endif
4330
4331#ifdef __OPTIMIZE__
4332extern __inline __m256i
4333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4334_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4335{
4336 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4337 (__v8si)
0b192937 4338 _mm256_undefined_si256 (),
756c5857
AI
4339 (__mmask8) -1, __R);
4340}
4341
4342extern __inline __m256i
4343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4344_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4345 const int __R)
4346{
4347 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4348 (__v8si) __W,
4349 (__mmask8) __U, __R);
4350}
4351
4352extern __inline __m256i
4353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4354_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4355{
4356 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4357 (__v8si)
4358 _mm256_setzero_si256 (),
4359 (__mmask8) __U, __R);
4360}
4361
4362extern __inline __m256i
4363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4364_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4365{
4366 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4367 (__v8si)
0b192937 4368 _mm256_undefined_si256 (),
756c5857
AI
4369 (__mmask8) -1, __R);
4370}
4371
4372extern __inline __m256i
4373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4374_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4375 const int __R)
4376{
4377 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4378 (__v8si) __W,
4379 (__mmask8) __U, __R);
4380}
4381
4382extern __inline __m256i
4383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4384_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4385{
4386 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4387 (__v8si)
4388 _mm256_setzero_si256 (),
4389 (__mmask8) __U, __R);
4390}
4391#else
4392#define _mm512_cvt_roundpd_epi32(A, B) \
0b192937 4393 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4394
4395#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4396 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4397
4398#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4399 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4400
4401#define _mm512_cvt_roundpd_epu32(A, B) \
0b192937 4402 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4403
4404#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4405 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4406
4407#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4408 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4409#endif
4410
4411#ifdef __OPTIMIZE__
4412extern __inline __m512i
4413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4415{
4416 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4417 (__v16si)
4271e5cb 4418 _mm512_undefined_epi32 (),
756c5857
AI
4419 (__mmask16) -1, __R);
4420}
4421
4422extern __inline __m512i
4423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4424_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4425 const int __R)
4426{
4427 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4428 (__v16si) __W,
4429 (__mmask16) __U, __R);
4430}
4431
4432extern __inline __m512i
4433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4434_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4435{
4436 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4437 (__v16si)
4438 _mm512_setzero_si512 (),
4439 (__mmask16) __U, __R);
4440}
4441
4442extern __inline __m512i
4443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4444_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4445{
4446 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4447 (__v16si)
4271e5cb 4448 _mm512_undefined_epi32 (),
756c5857
AI
4449 (__mmask16) -1, __R);
4450}
4451
4452extern __inline __m512i
4453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4454_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4455 const int __R)
4456{
4457 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4458 (__v16si) __W,
4459 (__mmask16) __U, __R);
4460}
4461
4462extern __inline __m512i
4463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4464_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4465{
4466 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4467 (__v16si)
4468 _mm512_setzero_si512 (),
4469 (__mmask16) __U, __R);
4470}
4471#else
4472#define _mm512_cvtt_roundps_epi32(A, B) \
4271e5cb 4473 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4474
4475#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4476 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4477
4478#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4479 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4480
4481#define _mm512_cvtt_roundps_epu32(A, B) \
4271e5cb 4482 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4483
4484#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4485 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4486
4487#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4488 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4489#endif
4490
4491#ifdef __OPTIMIZE__
4492extern __inline __m512i
4493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4494_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4495{
4496 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4497 (__v16si)
4271e5cb 4498 _mm512_undefined_epi32 (),
756c5857
AI
4499 (__mmask16) -1, __R);
4500}
4501
4502extern __inline __m512i
4503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4504_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4505 const int __R)
4506{
4507 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4508 (__v16si) __W,
4509 (__mmask16) __U, __R);
4510}
4511
4512extern __inline __m512i
4513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4515{
4516 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4517 (__v16si)
4518 _mm512_setzero_si512 (),
4519 (__mmask16) __U, __R);
4520}
4521
4522extern __inline __m512i
4523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4525{
4526 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4527 (__v16si)
4271e5cb 4528 _mm512_undefined_epi32 (),
756c5857
AI
4529 (__mmask16) -1, __R);
4530}
4531
4532extern __inline __m512i
4533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4534_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4535 const int __R)
4536{
4537 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4538 (__v16si) __W,
4539 (__mmask16) __U, __R);
4540}
4541
4542extern __inline __m512i
4543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4544_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4545{
4546 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4547 (__v16si)
4548 _mm512_setzero_si512 (),
4549 (__mmask16) __U, __R);
4550}
4551#else
4552#define _mm512_cvt_roundps_epi32(A, B) \
4271e5cb 4553 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4554
4555#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4556 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4557
4558#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4559 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4560
4561#define _mm512_cvt_roundps_epu32(A, B) \
4271e5cb 4562 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4563
4564#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4565 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4566
4567#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4568 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4569#endif
4570
4571extern __inline __m128d
4572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573_mm_cvtu32_sd (__m128d __A, unsigned __B)
4574{
4575 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4576}
4577
4578#ifdef __x86_64__
4579#ifdef __OPTIMIZE__
4580extern __inline __m128d
4581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4582_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4583{
4584 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4585}
4586
4587extern __inline __m128d
4588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4589_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4590{
4591 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4592}
4593
4594extern __inline __m128d
4595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4596_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4597{
4598 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4599}
4600#else
4601#define _mm_cvt_roundu64_sd(A, B, C) \
4602 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4603
4604#define _mm_cvt_roundi64_sd(A, B, C) \
4605 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4606
4607#define _mm_cvt_roundsi64_sd(A, B, C) \
4608 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4609#endif
4610
4611#endif
4612
4613#ifdef __OPTIMIZE__
4614extern __inline __m128
4615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4616_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4617{
4618 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4619}
4620
4621extern __inline __m128
4622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4623_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4624{
4625 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4626}
4627
4628extern __inline __m128
4629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4630_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4631{
4632 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4633}
4634#else
4635#define _mm_cvt_roundu32_ss(A, B, C) \
4636 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4637
4638#define _mm_cvt_roundi32_ss(A, B, C) \
4639 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4640
4641#define _mm_cvt_roundsi32_ss(A, B, C) \
4642 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4643#endif
4644
4645#ifdef __x86_64__
4646#ifdef __OPTIMIZE__
4647extern __inline __m128
4648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4649_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4650{
4651 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4652}
4653
4654extern __inline __m128
4655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4656_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4657{
4658 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4659}
4660
4661extern __inline __m128
4662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4663_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4664{
4665 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4666}
4667#else
4668#define _mm_cvt_roundu64_ss(A, B, C) \
4669 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4670
4671#define _mm_cvt_roundi64_ss(A, B, C) \
4672 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4673
4674#define _mm_cvt_roundsi64_ss(A, B, C) \
4675 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4676#endif
4677
4678#endif
4679
4680extern __inline __m128i
4681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682_mm512_cvtepi32_epi8 (__m512i __A)
4683{
0b192937
UD
4684 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4685 (__v16qi)
4686 _mm_undefined_si128 (),
756c5857
AI
4687 (__mmask16) -1);
4688}
4689
d256b866
IT
4690extern __inline void
4691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4692_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4693{
4694 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4695}
4696
756c5857
AI
4697extern __inline __m128i
4698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4699_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4700{
4701 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4702 (__v16qi) __O, __M);
4703}
4704
4705extern __inline __m128i
4706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4707_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4708{
4709 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4710 (__v16qi)
4711 _mm_setzero_si128 (),
4712 __M);
4713}
4714
4715extern __inline __m128i
4716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4717_mm512_cvtsepi32_epi8 (__m512i __A)
4718{
0b192937
UD
4719 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4720 (__v16qi)
4721 _mm_undefined_si128 (),
756c5857
AI
4722 (__mmask16) -1);
4723}
4724
d256b866
IT
4725extern __inline void
4726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4727_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4728{
4729 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4730}
4731
756c5857
AI
4732extern __inline __m128i
4733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4734_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4735{
4736 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4737 (__v16qi) __O, __M);
4738}
4739
4740extern __inline __m128i
4741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4743{
4744 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4745 (__v16qi)
4746 _mm_setzero_si128 (),
4747 __M);
4748}
4749
4750extern __inline __m128i
4751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4752_mm512_cvtusepi32_epi8 (__m512i __A)
4753{
0b192937
UD
4754 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4755 (__v16qi)
4756 _mm_undefined_si128 (),
756c5857
AI
4757 (__mmask16) -1);
4758}
4759
d256b866
IT
4760extern __inline void
4761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4763{
4764 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4765}
4766
756c5857
AI
4767extern __inline __m128i
4768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4769_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4770{
4771 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4772 (__v16qi) __O,
4773 __M);
4774}
4775
4776extern __inline __m128i
4777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4778_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4779{
4780 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4781 (__v16qi)
4782 _mm_setzero_si128 (),
4783 __M);
4784}
4785
4786extern __inline __m256i
4787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788_mm512_cvtepi32_epi16 (__m512i __A)
4789{
0b192937
UD
4790 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4791 (__v16hi)
4792 _mm256_undefined_si256 (),
756c5857
AI
4793 (__mmask16) -1);
4794}
4795
d256b866
IT
4796extern __inline void
4797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4799{
4800 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4801}
4802
756c5857
AI
4803extern __inline __m256i
4804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4806{
4807 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4808 (__v16hi) __O, __M);
4809}
4810
4811extern __inline __m256i
4812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4813_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4814{
4815 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4816 (__v16hi)
4817 _mm256_setzero_si256 (),
4818 __M);
4819}
4820
4821extern __inline __m256i
4822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4823_mm512_cvtsepi32_epi16 (__m512i __A)
4824{
0b192937
UD
4825 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4826 (__v16hi)
4827 _mm256_undefined_si256 (),
756c5857
AI
4828 (__mmask16) -1);
4829}
4830
d256b866
IT
4831extern __inline void
4832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4834{
4835 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4836}
4837
756c5857
AI
4838extern __inline __m256i
4839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4840_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4841{
4842 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4843 (__v16hi) __O, __M);
4844}
4845
4846extern __inline __m256i
4847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4848_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4849{
4850 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4851 (__v16hi)
4852 _mm256_setzero_si256 (),
4853 __M);
4854}
4855
4856extern __inline __m256i
4857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4858_mm512_cvtusepi32_epi16 (__m512i __A)
4859{
0b192937
UD
4860 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4861 (__v16hi)
4862 _mm256_undefined_si256 (),
756c5857
AI
4863 (__mmask16) -1);
4864}
4865
d256b866
IT
4866extern __inline void
4867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4868_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4869{
4870 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4871}
4872
756c5857
AI
4873extern __inline __m256i
4874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4875_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4876{
4877 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4878 (__v16hi) __O,
4879 __M);
4880}
4881
4882extern __inline __m256i
4883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4884_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4885{
4886 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4887 (__v16hi)
4888 _mm256_setzero_si256 (),
4889 __M);
4890}
4891
4892extern __inline __m256i
4893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894_mm512_cvtepi64_epi32 (__m512i __A)
4895{
0b192937
UD
4896 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4897 (__v8si)
4898 _mm256_undefined_si256 (),
756c5857
AI
4899 (__mmask8) -1);
4900}
4901
d256b866
IT
4902extern __inline void
4903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4905{
4906 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4907}
4908
756c5857
AI
4909extern __inline __m256i
4910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4911_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4912{
4913 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4914 (__v8si) __O, __M);
4915}
4916
4917extern __inline __m256i
4918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4919_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4920{
4921 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4922 (__v8si)
4923 _mm256_setzero_si256 (),
4924 __M);
4925}
4926
4927extern __inline __m256i
4928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4929_mm512_cvtsepi64_epi32 (__m512i __A)
4930{
0b192937
UD
4931 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4932 (__v8si)
4933 _mm256_undefined_si256 (),
756c5857
AI
4934 (__mmask8) -1);
4935}
4936
d256b866
IT
4937extern __inline void
4938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4939_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4940{
4941 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4942}
4943
756c5857
AI
4944extern __inline __m256i
4945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4946_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4947{
4948 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4949 (__v8si) __O, __M);
4950}
4951
4952extern __inline __m256i
4953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4954_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4955{
4956 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4957 (__v8si)
4958 _mm256_setzero_si256 (),
4959 __M);
4960}
4961
4962extern __inline __m256i
4963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4964_mm512_cvtusepi64_epi32 (__m512i __A)
4965{
0b192937
UD
4966 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4967 (__v8si)
4968 _mm256_undefined_si256 (),
756c5857
AI
4969 (__mmask8) -1);
4970}
4971
6fb82517 4972extern __inline void
d256b866
IT
4973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4974_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4975{
4976 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4977}
4978
756c5857
AI
4979extern __inline __m256i
4980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4981_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4982{
4983 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4984 (__v8si) __O, __M);
4985}
4986
4987extern __inline __m256i
4988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4989_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4990{
4991 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4992 (__v8si)
4993 _mm256_setzero_si256 (),
4994 __M);
4995}
4996
4997extern __inline __m128i
4998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4999_mm512_cvtepi64_epi16 (__m512i __A)
5000{
0b192937
UD
5001 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5002 (__v8hi)
5003 _mm_undefined_si128 (),
756c5857
AI
5004 (__mmask8) -1);
5005}
5006
d256b866
IT
5007extern __inline void
5008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5009_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5010{
5011 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5012}
5013
756c5857
AI
5014extern __inline __m128i
5015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5016_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5017{
5018 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5019 (__v8hi) __O, __M);
5020}
5021
5022extern __inline __m128i
5023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5024_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5025{
5026 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5027 (__v8hi)
5028 _mm_setzero_si128 (),
5029 __M);
5030}
5031
5032extern __inline __m128i
5033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034_mm512_cvtsepi64_epi16 (__m512i __A)
5035{
0b192937
UD
5036 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5037 (__v8hi)
5038 _mm_undefined_si128 (),
756c5857
AI
5039 (__mmask8) -1);
5040}
5041
d256b866
IT
5042extern __inline void
5043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5045{
5046 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5047}
5048
756c5857
AI
5049extern __inline __m128i
5050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5051_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5052{
5053 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5054 (__v8hi) __O, __M);
5055}
5056
5057extern __inline __m128i
5058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5059_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5060{
5061 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5062 (__v8hi)
5063 _mm_setzero_si128 (),
5064 __M);
5065}
5066
5067extern __inline __m128i
5068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5069_mm512_cvtusepi64_epi16 (__m512i __A)
5070{
0b192937
UD
5071 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5072 (__v8hi)
5073 _mm_undefined_si128 (),
756c5857
AI
5074 (__mmask8) -1);
5075}
5076
d256b866
IT
5077extern __inline void
5078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5079_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5080{
5081 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5082}
5083
756c5857
AI
5084extern __inline __m128i
5085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5086_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5087{
5088 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5089 (__v8hi) __O, __M);
5090}
5091
5092extern __inline __m128i
5093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5094_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5095{
5096 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5097 (__v8hi)
5098 _mm_setzero_si128 (),
5099 __M);
5100}
5101
5102extern __inline __m128i
5103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5104_mm512_cvtepi64_epi8 (__m512i __A)
5105{
0b192937
UD
5106 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5107 (__v16qi)
5108 _mm_undefined_si128 (),
756c5857
AI
5109 (__mmask8) -1);
5110}
5111
d256b866
IT
5112extern __inline void
5113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5115{
5116 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5117}
5118
756c5857
AI
5119extern __inline __m128i
5120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5122{
5123 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5124 (__v16qi) __O, __M);
5125}
5126
5127extern __inline __m128i
5128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5129_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5130{
5131 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5132 (__v16qi)
5133 _mm_setzero_si128 (),
5134 __M);
5135}
5136
5137extern __inline __m128i
5138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5139_mm512_cvtsepi64_epi8 (__m512i __A)
5140{
0b192937
UD
5141 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5142 (__v16qi)
5143 _mm_undefined_si128 (),
756c5857
AI
5144 (__mmask8) -1);
5145}
5146
d256b866
IT
5147extern __inline void
5148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5150{
5151 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5152}
5153
756c5857
AI
5154extern __inline __m128i
5155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5156_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5157{
5158 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5159 (__v16qi) __O, __M);
5160}
5161
5162extern __inline __m128i
5163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5164_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5165{
5166 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5167 (__v16qi)
5168 _mm_setzero_si128 (),
5169 __M);
5170}
5171
5172extern __inline __m128i
5173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5174_mm512_cvtusepi64_epi8 (__m512i __A)
5175{
0b192937
UD
5176 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5177 (__v16qi)
5178 _mm_undefined_si128 (),
756c5857
AI
5179 (__mmask8) -1);
5180}
5181
d256b866
IT
5182extern __inline void
5183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5185{
5186 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5187}
5188
756c5857
AI
5189extern __inline __m128i
5190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5191_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5192{
5193 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5194 (__v16qi) __O,
5195 __M);
5196}
5197
5198extern __inline __m128i
5199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5200_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5201{
5202 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5203 (__v16qi)
5204 _mm_setzero_si128 (),
5205 __M);
5206}
5207
5208extern __inline __m512d
5209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5210_mm512_cvtepi32_pd (__m256i __A)
5211{
5212 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5213 (__v8df)
0b192937 5214 _mm512_undefined_pd (),
756c5857
AI
5215 (__mmask8) -1);
5216}
5217
5218extern __inline __m512d
5219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5220_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5221{
5222 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5223 (__v8df) __W,
5224 (__mmask8) __U);
5225}
5226
5227extern __inline __m512d
5228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5229_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5230{
5231 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5232 (__v8df)
5233 _mm512_setzero_pd (),
5234 (__mmask8) __U);
5235}
5236
5237extern __inline __m512d
5238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5239_mm512_cvtepu32_pd (__m256i __A)
5240{
5241 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5242 (__v8df)
0b192937 5243 _mm512_undefined_pd (),
756c5857
AI
5244 (__mmask8) -1);
5245}
5246
5247extern __inline __m512d
5248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5249_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5250{
5251 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5252 (__v8df) __W,
5253 (__mmask8) __U);
5254}
5255
5256extern __inline __m512d
5257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5258_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5259{
5260 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5261 (__v8df)
5262 _mm512_setzero_pd (),
5263 (__mmask8) __U);
5264}
5265
5266#ifdef __OPTIMIZE__
5267extern __inline __m512
5268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5270{
5271 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5272 (__v16sf)
0b192937 5273 _mm512_undefined_ps (),
756c5857
AI
5274 (__mmask16) -1, __R);
5275}
5276
5277extern __inline __m512
5278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5279_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5280 const int __R)
5281{
5282 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5283 (__v16sf) __W,
5284 (__mmask16) __U, __R);
5285}
5286
5287extern __inline __m512
5288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5289_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5290{
5291 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5292 (__v16sf)
5293 _mm512_setzero_ps (),
5294 (__mmask16) __U, __R);
5295}
5296
5297extern __inline __m512
5298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5299_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5300{
5301 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5302 (__v16sf)
0b192937 5303 _mm512_undefined_ps (),
756c5857
AI
5304 (__mmask16) -1, __R);
5305}
5306
5307extern __inline __m512
5308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5309_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5310 const int __R)
5311{
5312 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5313 (__v16sf) __W,
5314 (__mmask16) __U, __R);
5315}
5316
5317extern __inline __m512
5318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5319_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5320{
5321 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5322 (__v16sf)
5323 _mm512_setzero_ps (),
5324 (__mmask16) __U, __R);
5325}
5326
5327#else
5328#define _mm512_cvt_roundepi32_ps(A, B) \
0b192937 5329 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5330
5331#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5332 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5333
5334#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5335 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5336
5337#define _mm512_cvt_roundepu32_ps(A, B) \
0b192937 5338 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5339
5340#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5341 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5342
5343#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5344 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5345#endif
5346
5347#ifdef __OPTIMIZE__
5348extern __inline __m256d
5349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5350_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5351{
5352 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5353 __imm,
5354 (__v4df)
0b192937 5355 _mm256_undefined_pd (),
756c5857
AI
5356 (__mmask8) -1);
5357}
5358
5359extern __inline __m256d
5360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5361_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5362 const int __imm)
5363{
5364 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5365 __imm,
5366 (__v4df) __W,
5367 (__mmask8) __U);
5368}
5369
5370extern __inline __m256d
5371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5372_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5373{
5374 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5375 __imm,
5376 (__v4df)
5377 _mm256_setzero_pd (),
5378 (__mmask8) __U);
5379}
5380
5381extern __inline __m128
5382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5383_mm512_extractf32x4_ps (__m512 __A, const int __imm)
5384{
5385 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5386 __imm,
5387 (__v4sf)
0b192937 5388 _mm_undefined_ps (),
756c5857
AI
5389 (__mmask8) -1);
5390}
5391
5392extern __inline __m128
5393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5394_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5395 const int __imm)
5396{
5397 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5398 __imm,
5399 (__v4sf) __W,
5400 (__mmask8) __U);
5401}
5402
5403extern __inline __m128
5404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5405_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5406{
5407 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5408 __imm,
5409 (__v4sf)
5410 _mm_setzero_ps (),
5411 (__mmask8) __U);
5412}
5413
5414extern __inline __m256i
5415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5416_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5417{
5418 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5419 __imm,
5420 (__v4di)
0b192937 5421 _mm256_undefined_si256 (),
756c5857
AI
5422 (__mmask8) -1);
5423}
5424
5425extern __inline __m256i
5426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5427_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5428 const int __imm)
5429{
5430 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5431 __imm,
5432 (__v4di) __W,
5433 (__mmask8) __U);
5434}
5435
5436extern __inline __m256i
5437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5438_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5439{
5440 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5441 __imm,
5442 (__v4di)
5443 _mm256_setzero_si256 (),
5444 (__mmask8) __U);
5445}
5446
5447extern __inline __m128i
5448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5449_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5450{
5451 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5452 __imm,
5453 (__v4si)
0b192937 5454 _mm_undefined_si128 (),
756c5857
AI
5455 (__mmask8) -1);
5456}
5457
5458extern __inline __m128i
5459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5460_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5461 const int __imm)
5462{
5463 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5464 __imm,
5465 (__v4si) __W,
5466 (__mmask8) __U);
5467}
5468
5469extern __inline __m128i
5470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5471_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5472{
5473 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5474 __imm,
5475 (__v4si)
5476 _mm_setzero_si128 (),
5477 (__mmask8) __U);
5478}
5479#else
5480
5481#define _mm512_extractf64x4_pd(X, C) \
5482 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5483 (int) (C),\
0b192937 5484 (__v4df)(__m256d)_mm256_undefined_pd(),\
756c5857
AI
5485 (__mmask8)-1))
5486
5487#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5488 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5489 (int) (C),\
5490 (__v4df)(__m256d)(W),\
5491 (__mmask8)(U)))
5492
5493#define _mm512_maskz_extractf64x4_pd(U, X, C) \
5494 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5495 (int) (C),\
5496 (__v4df)(__m256d)_mm256_setzero_pd(),\
5497 (__mmask8)(U)))
5498
5499#define _mm512_extractf32x4_ps(X, C) \
5500 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5501 (int) (C),\
0b192937 5502 (__v4sf)(__m128)_mm_undefined_ps(),\
756c5857
AI
5503 (__mmask8)-1))
5504
5505#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5506 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5507 (int) (C),\
5508 (__v4sf)(__m128)(W),\
5509 (__mmask8)(U)))
5510
5511#define _mm512_maskz_extractf32x4_ps(U, X, C) \
5512 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5513 (int) (C),\
5514 (__v4sf)(__m128)_mm_setzero_ps(),\
5515 (__mmask8)(U)))
5516
5517#define _mm512_extracti64x4_epi64(X, C) \
5518 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5519 (int) (C),\
0b192937 5520 (__v4di)(__m256i)_mm256_undefined_si256 (),\
756c5857
AI
5521 (__mmask8)-1))
5522
5523#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5524 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5525 (int) (C),\
5526 (__v4di)(__m256i)(W),\
5527 (__mmask8)(U)))
5528
5529#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5530 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5531 (int) (C),\
5532 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5533 (__mmask8)(U)))
5534
5535#define _mm512_extracti32x4_epi32(X, C) \
5536 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5537 (int) (C),\
0b192937 5538 (__v4si)(__m128i)_mm_undefined_si128 (),\
756c5857
AI
5539 (__mmask8)-1))
5540
5541#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5542 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5543 (int) (C),\
5544 (__v4si)(__m128i)(W),\
5545 (__mmask8)(U)))
5546
5547#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5548 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5549 (int) (C),\
5550 (__v4si)(__m128i)_mm_setzero_si128 (),\
5551 (__mmask8)(U)))
5552#endif
5553
5554#ifdef __OPTIMIZE__
5555extern __inline __m512i
5556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5557_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5558{
5559 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5560 (__v4si) __B,
5561 __imm,
5562 (__v16si) __A, -1);
5563}
5564
5565extern __inline __m512
5566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5567_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5568{
5569 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5570 (__v4sf) __B,
5571 __imm,
5572 (__v16sf) __A, -1);
5573}
5574
5575extern __inline __m512i
5576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5578{
5579 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5580 (__v4di) __B,
5581 __imm,
5582 (__v8di)
4271e5cb 5583 _mm512_undefined_epi32 (),
756c5857
AI
5584 (__mmask8) -1);
5585}
5586
5587extern __inline __m512i
5588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5589_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5590 __m256i __B, const int __imm)
5591{
5592 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5593 (__v4di) __B,
5594 __imm,
5595 (__v8di) __W,
5596 (__mmask8) __U);
5597}
5598
5599extern __inline __m512i
5600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5601_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5602 const int __imm)
5603{
5604 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5605 (__v4di) __B,
5606 __imm,
5607 (__v8di)
5608 _mm512_setzero_si512 (),
5609 (__mmask8) __U);
5610}
5611
5612extern __inline __m512d
5613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5615{
5616 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5617 (__v4df) __B,
5618 __imm,
5619 (__v8df)
0b192937 5620 _mm512_undefined_pd (),
756c5857
AI
5621 (__mmask8) -1);
5622}
5623
5624extern __inline __m512d
5625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5626_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5627 __m256d __B, const int __imm)
5628{
5629 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5630 (__v4df) __B,
5631 __imm,
5632 (__v8df) __W,
5633 (__mmask8) __U);
5634}
5635
5636extern __inline __m512d
5637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5638_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5639 const int __imm)
5640{
5641 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5642 (__v4df) __B,
5643 __imm,
5644 (__v8df)
5645 _mm512_setzero_pd (),
5646 (__mmask8) __U);
5647}
5648#else
5649#define _mm512_insertf32x4(X, Y, C) \
5650 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5651 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5652
5653#define _mm512_inserti32x4(X, Y, C) \
5654 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5655 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5656
5657#define _mm512_insertf64x4(X, Y, C) \
5658 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5659 (__v4df)(__m256d) (Y), (int) (C), \
0b192937 5660 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
5661 (__mmask8)-1))
5662
5663#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5664 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5665 (__v4df)(__m256d) (Y), (int) (C), \
5666 (__v8df)(__m512d)(W), \
5667 (__mmask8)(U)))
5668
5669#define _mm512_maskz_insertf64x4(U, X, Y, C) \
5670 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5671 (__v4df)(__m256d) (Y), (int) (C), \
5672 (__v8df)(__m512d)_mm512_setzero_pd(), \
5673 (__mmask8)(U)))
5674
5675#define _mm512_inserti64x4(X, Y, C) \
5676 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5677 (__v4di)(__m256i) (Y), (int) (C), \
4271e5cb 5678 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
5679 (__mmask8)-1))
5680
5681#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5682 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5683 (__v4di)(__m256i) (Y), (int) (C),\
5684 (__v8di)(__m512i)(W),\
5685 (__mmask8)(U)))
5686
5687#define _mm512_maskz_inserti64x4(U, X, Y, C) \
5688 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5689 (__v4di)(__m256i) (Y), (int) (C), \
5690 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5691 (__mmask8)(U)))
5692#endif
5693
5694extern __inline __m512d
5695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696_mm512_loadu_pd (void const *__P)
5697{
c6b0037d 5698 return *(__m512d_u *)__P;
756c5857
AI
5699}
5700
5701extern __inline __m512d
5702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5703_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5704{
fc9cf6da 5705 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
5706 (__v8df) __W,
5707 (__mmask8) __U);
5708}
5709
5710extern __inline __m512d
5711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5712_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5713{
fc9cf6da 5714 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
5715 (__v8df)
5716 _mm512_setzero_pd (),
5717 (__mmask8) __U);
5718}
5719
5720extern __inline void
5721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5722_mm512_storeu_pd (void *__P, __m512d __A)
5723{
c6b0037d 5724 *(__m512d_u *)__P = __A;
756c5857
AI
5725}
5726
5727extern __inline void
5728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5730{
fc9cf6da 5731 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
756c5857
AI
5732 (__mmask8) __U);
5733}
5734
5735extern __inline __m512
5736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5737_mm512_loadu_ps (void const *__P)
5738{
c6b0037d 5739 return *(__m512_u *)__P;
756c5857
AI
5740}
5741
5742extern __inline __m512
5743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5744_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5745{
fc9cf6da 5746 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
5747 (__v16sf) __W,
5748 (__mmask16) __U);
5749}
5750
5751extern __inline __m512
5752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5753_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5754{
fc9cf6da 5755 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
5756 (__v16sf)
5757 _mm512_setzero_ps (),
5758 (__mmask16) __U);
5759}
5760
5761extern __inline void
5762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763_mm512_storeu_ps (void *__P, __m512 __A)
5764{
c6b0037d 5765 *(__m512_u *)__P = __A;
756c5857
AI
5766}
5767
5768extern __inline void
5769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5770_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5771{
fc9cf6da 5772 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
756c5857
AI
5773 (__mmask16) __U);
5774}
5775
5776extern __inline __m512i
5777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5778_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5779{
fc9cf6da 5780 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
5781 (__v8di) __W,
5782 (__mmask8) __U);
5783}
5784
5785extern __inline __m512i
5786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5787_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5788{
fc9cf6da 5789 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
5790 (__v8di)
5791 _mm512_setzero_si512 (),
5792 (__mmask8) __U);
5793}
5794
5795extern __inline void
5796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5797_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5798{
fc9cf6da 5799 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
756c5857
AI
5800 (__mmask8) __U);
5801}
5802
5803extern __inline __m512i
5804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 5805_mm512_loadu_si512 (void const *__P)
756c5857 5806{
c6b0037d 5807 return *(__m512i_u *)__P;
756c5857
AI
5808}
5809
5810extern __inline __m512i
5811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5812_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5813{
fc9cf6da 5814 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
5815 (__v16si) __W,
5816 (__mmask16) __U);
5817}
5818
5819extern __inline __m512i
5820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5821_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5822{
fc9cf6da 5823 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
5824 (__v16si)
5825 _mm512_setzero_si512 (),
5826 (__mmask16) __U);
5827}
5828
5829extern __inline void
5830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 5831_mm512_storeu_si512 (void *__P, __m512i __A)
756c5857 5832{
c6b0037d 5833 *(__m512i_u *)__P = __A;
756c5857
AI
5834}
5835
5836extern __inline void
5837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5838_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5839{
fc9cf6da 5840 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
756c5857
AI
5841 (__mmask16) __U);
5842}
5843
5844extern __inline __m512d
5845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846_mm512_permutevar_pd (__m512d __A, __m512i __C)
5847{
5848 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5849 (__v8di) __C,
5850 (__v8df)
0b192937 5851 _mm512_undefined_pd (),
756c5857
AI
5852 (__mmask8) -1);
5853}
5854
5855extern __inline __m512d
5856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5857_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5858{
5859 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5860 (__v8di) __C,
5861 (__v8df) __W,
5862 (__mmask8) __U);
5863}
5864
5865extern __inline __m512d
5866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5868{
5869 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5870 (__v8di) __C,
5871 (__v8df)
5872 _mm512_setzero_pd (),
5873 (__mmask8) __U);
5874}
5875
5876extern __inline __m512
5877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5878_mm512_permutevar_ps (__m512 __A, __m512i __C)
5879{
5880 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5881 (__v16si) __C,
5882 (__v16sf)
0b192937 5883 _mm512_undefined_ps (),
756c5857
AI
5884 (__mmask16) -1);
5885}
5886
5887extern __inline __m512
5888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5889_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5890{
5891 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5892 (__v16si) __C,
5893 (__v16sf) __W,
5894 (__mmask16) __U);
5895}
5896
5897extern __inline __m512
5898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5899_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5900{
5901 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5902 (__v16si) __C,
5903 (__v16sf)
5904 _mm512_setzero_ps (),
5905 (__mmask16) __U);
5906}
5907
5908extern __inline __m512i
5909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5910_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5911{
5912 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5913 /* idx */ ,
5914 (__v8di) __A,
5915 (__v8di) __B,
5916 (__mmask8) -1);
5917}
5918
5919extern __inline __m512i
5920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5922 __m512i __B)
5923{
5924 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5925 /* idx */ ,
5926 (__v8di) __A,
5927 (__v8di) __B,
5928 (__mmask8) __U);
5929}
5930
5931extern __inline __m512i
5932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5934 __mmask8 __U, __m512i __B)
5935{
5936 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5937 (__v8di) __I
5938 /* idx */ ,
5939 (__v8di) __B,
5940 (__mmask8) __U);
5941}
5942
5943extern __inline __m512i
5944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5945_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5946 __m512i __I, __m512i __B)
5947{
5948 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5949 /* idx */ ,
5950 (__v8di) __A,
5951 (__v8di) __B,
5952 (__mmask8) __U);
5953}
5954
5955extern __inline __m512i
5956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5957_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5958{
5959 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5960 /* idx */ ,
5961 (__v16si) __A,
5962 (__v16si) __B,
5963 (__mmask16) -1);
5964}
5965
5966extern __inline __m512i
5967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5969 __m512i __I, __m512i __B)
5970{
5971 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5972 /* idx */ ,
5973 (__v16si) __A,
5974 (__v16si) __B,
5975 (__mmask16) __U);
5976}
5977
5978extern __inline __m512i
5979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5980_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5981 __mmask16 __U, __m512i __B)
5982{
5983 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5984 (__v16si) __I
5985 /* idx */ ,
5986 (__v16si) __B,
5987 (__mmask16) __U);
5988}
5989
5990extern __inline __m512i
5991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5992_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5993 __m512i __I, __m512i __B)
5994{
5995 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5996 /* idx */ ,
5997 (__v16si) __A,
5998 (__v16si) __B,
5999 (__mmask16) __U);
6000}
6001
6002extern __inline __m512d
6003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6004_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6005{
6006 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6007 /* idx */ ,
6008 (__v8df) __A,
6009 (__v8df) __B,
6010 (__mmask8) -1);
6011}
6012
6013extern __inline __m512d
6014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6015_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6016 __m512d __B)
6017{
6018 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6019 /* idx */ ,
6020 (__v8df) __A,
6021 (__v8df) __B,
6022 (__mmask8) __U);
6023}
6024
6025extern __inline __m512d
6026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6027_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6028 __m512d __B)
6029{
6030 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6031 (__v8di) __I
6032 /* idx */ ,
6033 (__v8df) __B,
6034 (__mmask8) __U);
6035}
6036
6037extern __inline __m512d
6038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6040 __m512d __B)
6041{
6042 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6043 /* idx */ ,
6044 (__v8df) __A,
6045 (__v8df) __B,
6046 (__mmask8) __U);
6047}
6048
6049extern __inline __m512
6050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6051_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6052{
6053 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6054 /* idx */ ,
6055 (__v16sf) __A,
6056 (__v16sf) __B,
6057 (__mmask16) -1);
6058}
6059
6060extern __inline __m512
6061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6062_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6063{
6064 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6065 /* idx */ ,
6066 (__v16sf) __A,
6067 (__v16sf) __B,
6068 (__mmask16) __U);
6069}
6070
6071extern __inline __m512
6072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6074 __m512 __B)
6075{
6076 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6077 (__v16si) __I
6078 /* idx */ ,
6079 (__v16sf) __B,
6080 (__mmask16) __U);
6081}
6082
6083extern __inline __m512
6084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6085_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6086 __m512 __B)
6087{
6088 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6089 /* idx */ ,
6090 (__v16sf) __A,
6091 (__v16sf) __B,
6092 (__mmask16) __U);
6093}
6094
6095#ifdef __OPTIMIZE__
6096extern __inline __m512d
6097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6098_mm512_permute_pd (__m512d __X, const int __C)
6099{
6100 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6101 (__v8df)
0b192937 6102 _mm512_undefined_pd (),
756c5857
AI
6103 (__mmask8) -1);
6104}
6105
6106extern __inline __m512d
6107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6108_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6109{
6110 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6111 (__v8df) __W,
6112 (__mmask8) __U);
6113}
6114
6115extern __inline __m512d
6116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6117_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6118{
6119 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6120 (__v8df)
6121 _mm512_setzero_pd (),
6122 (__mmask8) __U);
6123}
6124
6125extern __inline __m512
6126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6127_mm512_permute_ps (__m512 __X, const int __C)
6128{
6129 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6130 (__v16sf)
0b192937 6131 _mm512_undefined_ps (),
756c5857
AI
6132 (__mmask16) -1);
6133}
6134
6135extern __inline __m512
6136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6137_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6138{
6139 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6140 (__v16sf) __W,
6141 (__mmask16) __U);
6142}
6143
6144extern __inline __m512
6145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6146_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6147{
6148 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6149 (__v16sf)
6150 _mm512_setzero_ps (),
6151 (__mmask16) __U);
6152}
6153#else
6154#define _mm512_permute_pd(X, C) \
6155 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
0b192937 6156 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6157 (__mmask8)(-1)))
6158
6159#define _mm512_mask_permute_pd(W, U, X, C) \
6160 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6161 (__v8df)(__m512d)(W), \
6162 (__mmask8)(U)))
6163
6164#define _mm512_maskz_permute_pd(U, X, C) \
6165 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6166 (__v8df)(__m512d)_mm512_setzero_pd(), \
6167 (__mmask8)(U)))
6168
6169#define _mm512_permute_ps(X, C) \
6170 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
0b192937 6171 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6172 (__mmask16)(-1)))
6173
6174#define _mm512_mask_permute_ps(W, U, X, C) \
6175 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6176 (__v16sf)(__m512)(W), \
6177 (__mmask16)(U)))
6178
6179#define _mm512_maskz_permute_ps(U, X, C) \
6180 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6181 (__v16sf)(__m512)_mm512_setzero_ps(), \
6182 (__mmask16)(U)))
6183#endif
6184
6185#ifdef __OPTIMIZE__
6186extern __inline __m512i
6187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6188_mm512_permutex_epi64 (__m512i __X, const int __I)
6189{
6190 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6191 (__v8di)
4271e5cb 6192 _mm512_undefined_epi32 (),
756c5857
AI
6193 (__mmask8) (-1));
6194}
6195
6196extern __inline __m512i
6197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6198_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6199 __m512i __X, const int __I)
6200{
6201 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6202 (__v8di) __W,
6203 (__mmask8) __M);
6204}
6205
6206extern __inline __m512i
6207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6208_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6209{
6210 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6211 (__v8di)
6212 _mm512_setzero_si512 (),
6213 (__mmask8) __M);
6214}
6215
6216extern __inline __m512d
6217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6218_mm512_permutex_pd (__m512d __X, const int __M)
6219{
6220 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6221 (__v8df)
0b192937 6222 _mm512_undefined_pd (),
756c5857
AI
6223 (__mmask8) -1);
6224}
6225
6226extern __inline __m512d
6227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6228_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6229{
6230 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6231 (__v8df) __W,
6232 (__mmask8) __U);
6233}
6234
6235extern __inline __m512d
6236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6237_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6238{
6239 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6240 (__v8df)
6241 _mm512_setzero_pd (),
6242 (__mmask8) __U);
6243}
6244#else
6245#define _mm512_permutex_pd(X, M) \
6246 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
0b192937
UD
6247 (__v8df)(__m512d)_mm512_undefined_pd(),\
6248 (__mmask8)-1))
756c5857
AI
6249
6250#define _mm512_mask_permutex_pd(W, U, X, M) \
6251 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6252 (__v8df)(__m512d)(W), (__mmask8)(U)))
6253
6254#define _mm512_maskz_permutex_pd(U, X, M) \
6255 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6256 (__v8df)(__m512d)_mm512_setzero_pd(),\
6257 (__mmask8)(U)))
6258
6259#define _mm512_permutex_epi64(X, I) \
6260 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6261 (int)(I), \
0b192937 6262 (__v8di)(__m512i) \
4271e5cb 6263 (_mm512_undefined_epi32 ()),\
756c5857
AI
6264 (__mmask8)(-1)))
6265
6266#define _mm512_maskz_permutex_epi64(M, X, I) \
6267 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6268 (int)(I), \
6269 (__v8di)(__m512i) \
6270 (_mm512_setzero_si512 ()),\
6271 (__mmask8)(M)))
6272
6273#define _mm512_mask_permutex_epi64(W, M, X, I) \
6274 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6275 (int)(I), \
6276 (__v8di)(__m512i)(W), \
6277 (__mmask8)(M)))
6278#endif
6279
6280extern __inline __m512i
6281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6282_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6283{
583a9919
KY
6284 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6285 (__v8di) __X,
756c5857
AI
6286 (__v8di)
6287 _mm512_setzero_si512 (),
6288 __M);
6289}
6290
6291extern __inline __m512i
6292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6293_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6294{
583a9919
KY
6295 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6296 (__v8di) __X,
756c5857 6297 (__v8di)
4271e5cb 6298 _mm512_undefined_epi32 (),
756c5857
AI
6299 (__mmask8) -1);
6300}
6301
6302extern __inline __m512i
6303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6304_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6305 __m512i __Y)
6306{
583a9919
KY
6307 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6308 (__v8di) __X,
756c5857
AI
6309 (__v8di) __W,
6310 __M);
6311}
6312
6313extern __inline __m512i
6314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6315_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6316{
583a9919
KY
6317 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6318 (__v16si) __X,
756c5857
AI
6319 (__v16si)
6320 _mm512_setzero_si512 (),
6321 __M);
6322}
6323
6324extern __inline __m512i
6325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6326_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6327{
583a9919
KY
6328 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6329 (__v16si) __X,
756c5857 6330 (__v16si)
4271e5cb 6331 _mm512_undefined_epi32 (),
756c5857
AI
6332 (__mmask16) -1);
6333}
6334
6335extern __inline __m512i
6336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6337_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6338 __m512i __Y)
6339{
583a9919
KY
6340 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6341 (__v16si) __X,
756c5857
AI
6342 (__v16si) __W,
6343 __M);
6344}
6345
6346extern __inline __m512d
6347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6348_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6349{
6350 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6351 (__v8di) __X,
6352 (__v8df)
0b192937 6353 _mm512_undefined_pd (),
756c5857
AI
6354 (__mmask8) -1);
6355}
6356
6357extern __inline __m512d
6358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6359_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6360{
6361 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6362 (__v8di) __X,
6363 (__v8df) __W,
6364 (__mmask8) __U);
6365}
6366
6367extern __inline __m512d
6368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6369_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6370{
6371 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6372 (__v8di) __X,
6373 (__v8df)
6374 _mm512_setzero_pd (),
6375 (__mmask8) __U);
6376}
6377
6378extern __inline __m512
6379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6380_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6381{
6382 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6383 (__v16si) __X,
6384 (__v16sf)
0b192937 6385 _mm512_undefined_ps (),
756c5857
AI
6386 (__mmask16) -1);
6387}
6388
6389extern __inline __m512
6390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6391_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6392{
6393 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6394 (__v16si) __X,
6395 (__v16sf) __W,
6396 (__mmask16) __U);
6397}
6398
6399extern __inline __m512
6400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6401_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6402{
6403 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6404 (__v16si) __X,
6405 (__v16sf)
6406 _mm512_setzero_ps (),
6407 (__mmask16) __U);
6408}
6409
6410#ifdef __OPTIMIZE__
6411extern __inline __m512
6412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6413_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6414{
6415 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6416 (__v16sf) __V, __imm,
6417 (__v16sf)
0b192937 6418 _mm512_undefined_ps (),
756c5857
AI
6419 (__mmask16) -1);
6420}
6421
6422extern __inline __m512
6423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6424_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6425 __m512 __V, const int __imm)
6426{
6427 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6428 (__v16sf) __V, __imm,
6429 (__v16sf) __W,
6430 (__mmask16) __U);
6431}
6432
6433extern __inline __m512
6434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6435_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6436{
6437 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6438 (__v16sf) __V, __imm,
6439 (__v16sf)
6440 _mm512_setzero_ps (),
6441 (__mmask16) __U);
6442}
6443
6444extern __inline __m512d
6445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6447{
6448 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6449 (__v8df) __V, __imm,
6450 (__v8df)
0b192937 6451 _mm512_undefined_pd (),
756c5857
AI
6452 (__mmask8) -1);
6453}
6454
6455extern __inline __m512d
6456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6457_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6458 __m512d __V, const int __imm)
6459{
6460 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6461 (__v8df) __V, __imm,
6462 (__v8df) __W,
6463 (__mmask8) __U);
6464}
6465
6466extern __inline __m512d
6467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6469 const int __imm)
6470{
6471 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6472 (__v8df) __V, __imm,
6473 (__v8df)
6474 _mm512_setzero_pd (),
6475 (__mmask8) __U);
6476}
6477
6478extern __inline __m512d
6479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6481 const int __imm, const int __R)
6482{
6483 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6484 (__v8df) __B,
6485 (__v8di) __C,
6486 __imm,
6487 (__mmask8) -1, __R);
6488}
6489
6490extern __inline __m512d
6491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6492_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6493 __m512i __C, const int __imm, const int __R)
6494{
6495 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6496 (__v8df) __B,
6497 (__v8di) __C,
6498 __imm,
6499 (__mmask8) __U, __R);
6500}
6501
6502extern __inline __m512d
6503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6504_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6505 __m512i __C, const int __imm, const int __R)
6506{
6507 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6508 (__v8df) __B,
6509 (__v8di) __C,
6510 __imm,
6511 (__mmask8) __U, __R);
6512}
6513
6514extern __inline __m512
6515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6516_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6517 const int __imm, const int __R)
6518{
6519 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6520 (__v16sf) __B,
6521 (__v16si) __C,
6522 __imm,
6523 (__mmask16) -1, __R);
6524}
6525
6526extern __inline __m512
6527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6528_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6529 __m512i __C, const int __imm, const int __R)
6530{
6531 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6532 (__v16sf) __B,
6533 (__v16si) __C,
6534 __imm,
6535 (__mmask16) __U, __R);
6536}
6537
6538extern __inline __m512
6539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6540_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6541 __m512i __C, const int __imm, const int __R)
6542{
6543 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6544 (__v16sf) __B,
6545 (__v16si) __C,
6546 __imm,
6547 (__mmask16) __U, __R);
6548}
6549
6550extern __inline __m128d
6551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6552_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6553 const int __imm, const int __R)
6554{
6555 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6556 (__v2df) __B,
6557 (__v2di) __C, __imm,
6558 (__mmask8) -1, __R);
6559}
6560
6561extern __inline __m128d
6562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6563_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6564 __m128i __C, const int __imm, const int __R)
6565{
6566 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6567 (__v2df) __B,
6568 (__v2di) __C, __imm,
6569 (__mmask8) __U, __R);
6570}
6571
6572extern __inline __m128d
6573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6574_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6575 __m128i __C, const int __imm, const int __R)
6576{
6577 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6578 (__v2df) __B,
6579 (__v2di) __C,
6580 __imm,
6581 (__mmask8) __U, __R);
6582}
6583
6584extern __inline __m128
6585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6586_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6587 const int __imm, const int __R)
6588{
6589 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6590 (__v4sf) __B,
6591 (__v4si) __C, __imm,
6592 (__mmask8) -1, __R);
6593}
6594
6595extern __inline __m128
6596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6597_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6598 __m128i __C, const int __imm, const int __R)
6599{
6600 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6601 (__v4sf) __B,
6602 (__v4si) __C, __imm,
6603 (__mmask8) __U, __R);
6604}
6605
6606extern __inline __m128
6607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6608_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6609 __m128i __C, const int __imm, const int __R)
6610{
6611 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6612 (__v4sf) __B,
6613 (__v4si) __C, __imm,
6614 (__mmask8) __U, __R);
6615}
6616
6617#else
6618#define _mm512_shuffle_pd(X, Y, C) \
6619 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6620 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 6621 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6622 (__mmask8)-1))
6623
6624#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6625 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6626 (__v8df)(__m512d)(Y), (int)(C),\
6627 (__v8df)(__m512d)(W),\
6628 (__mmask8)(U)))
6629
6630#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6631 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6632 (__v8df)(__m512d)(Y), (int)(C),\
6633 (__v8df)(__m512d)_mm512_setzero_pd(),\
6634 (__mmask8)(U)))
6635
6636#define _mm512_shuffle_ps(X, Y, C) \
6637 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6638 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 6639 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6640 (__mmask16)-1))
6641
6642#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6643 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6644 (__v16sf)(__m512)(Y), (int)(C),\
6645 (__v16sf)(__m512)(W),\
6646 (__mmask16)(U)))
6647
6648#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6649 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6650 (__v16sf)(__m512)(Y), (int)(C),\
6651 (__v16sf)(__m512)_mm512_setzero_ps(),\
6652 (__mmask16)(U)))
6653
6654#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6655 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6656 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6657 (__mmask8)(-1), (R)))
6658
6659#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6660 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6661 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6662 (__mmask8)(U), (R)))
6663
6664#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6665 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6666 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6667 (__mmask8)(U), (R)))
6668
6669#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6670 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6671 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6672 (__mmask16)(-1), (R)))
6673
6674#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6675 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6676 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6677 (__mmask16)(U), (R)))
6678
6679#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6680 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6681 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6682 (__mmask16)(U), (R)))
6683
6684#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6685 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6686 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6687 (__mmask8)(-1), (R)))
6688
6689#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6690 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6691 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6692 (__mmask8)(U), (R)))
6693
6694#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6695 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6696 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6697 (__mmask8)(U), (R)))
6698
6699#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6700 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6701 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6702 (__mmask8)(-1), (R)))
6703
6704#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6705 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6706 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6707 (__mmask8)(U), (R)))
6708
6709#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6710 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6711 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6712 (__mmask8)(U), (R)))
6713#endif
6714
6715extern __inline __m512
6716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6717_mm512_movehdup_ps (__m512 __A)
6718{
6719 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6720 (__v16sf)
0b192937 6721 _mm512_undefined_ps (),
756c5857
AI
6722 (__mmask16) -1);
6723}
6724
6725extern __inline __m512
6726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6727_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6728{
6729 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6730 (__v16sf) __W,
6731 (__mmask16) __U);
6732}
6733
6734extern __inline __m512
6735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6736_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6737{
6738 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6739 (__v16sf)
6740 _mm512_setzero_ps (),
6741 (__mmask16) __U);
6742}
6743
6744extern __inline __m512
6745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746_mm512_moveldup_ps (__m512 __A)
6747{
6748 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6749 (__v16sf)
0b192937 6750 _mm512_undefined_ps (),
756c5857
AI
6751 (__mmask16) -1);
6752}
6753
6754extern __inline __m512
6755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6756_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6757{
6758 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6759 (__v16sf) __W,
6760 (__mmask16) __U);
6761}
6762
6763extern __inline __m512
6764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6765_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6766{
6767 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6768 (__v16sf)
6769 _mm512_setzero_ps (),
6770 (__mmask16) __U);
6771}
6772
6773extern __inline __m512i
6774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6775_mm512_or_si512 (__m512i __A, __m512i __B)
6776{
2069d6fc 6777 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
6778}
6779
6780extern __inline __m512i
6781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6782_mm512_or_epi32 (__m512i __A, __m512i __B)
6783{
2069d6fc 6784 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
6785}
6786
6787extern __inline __m512i
6788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6789_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6790{
6791 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6792 (__v16si) __B,
6793 (__v16si) __W,
6794 (__mmask16) __U);
6795}
6796
6797extern __inline __m512i
6798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6799_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6800{
6801 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6802 (__v16si) __B,
6803 (__v16si)
6804 _mm512_setzero_si512 (),
6805 (__mmask16) __U);
6806}
6807
6808extern __inline __m512i
6809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6810_mm512_or_epi64 (__m512i __A, __m512i __B)
6811{
2069d6fc 6812 return (__m512i) ((__v8du) __A | (__v8du) __B);
756c5857
AI
6813}
6814
6815extern __inline __m512i
6816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6817_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6818{
6819 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6820 (__v8di) __B,
6821 (__v8di) __W,
6822 (__mmask8) __U);
6823}
6824
6825extern __inline __m512i
6826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6827_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6828{
6829 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6830 (__v8di) __B,
6831 (__v8di)
6832 _mm512_setzero_si512 (),
6833 (__mmask8) __U);
6834}
6835
6836extern __inline __m512i
6837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6838_mm512_xor_si512 (__m512i __A, __m512i __B)
6839{
2069d6fc 6840 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
6841}
6842
6843extern __inline __m512i
6844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845_mm512_xor_epi32 (__m512i __A, __m512i __B)
6846{
2069d6fc 6847 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
6848}
6849
6850extern __inline __m512i
6851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6852_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6853{
6854 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6855 (__v16si) __B,
6856 (__v16si) __W,
6857 (__mmask16) __U);
6858}
6859
6860extern __inline __m512i
6861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6862_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6863{
6864 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6865 (__v16si) __B,
6866 (__v16si)
6867 _mm512_setzero_si512 (),
6868 (__mmask16) __U);
6869}
6870
6871extern __inline __m512i
6872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6873_mm512_xor_epi64 (__m512i __A, __m512i __B)
6874{
2069d6fc 6875 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
756c5857
AI
6876}
6877
6878extern __inline __m512i
6879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6880_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6881{
6882 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6883 (__v8di) __B,
6884 (__v8di) __W,
6885 (__mmask8) __U);
6886}
6887
6888extern __inline __m512i
6889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6890_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6891{
6892 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6893 (__v8di) __B,
6894 (__v8di)
6895 _mm512_setzero_si512 (),
6896 (__mmask8) __U);
6897}
6898
6899#ifdef __OPTIMIZE__
6900extern __inline __m512i
6901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6902_mm512_rol_epi32 (__m512i __A, const int __B)
6903{
6904 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6905 (__v16si)
4271e5cb 6906 _mm512_undefined_epi32 (),
756c5857
AI
6907 (__mmask16) -1);
6908}
6909
6910extern __inline __m512i
6911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6912_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6913{
6914 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6915 (__v16si) __W,
6916 (__mmask16) __U);
6917}
6918
6919extern __inline __m512i
6920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6921_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6922{
6923 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6924 (__v16si)
6925 _mm512_setzero_si512 (),
6926 (__mmask16) __U);
6927}
6928
6929extern __inline __m512i
6930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6931_mm512_ror_epi32 (__m512i __A, int __B)
6932{
6933 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6934 (__v16si)
4271e5cb 6935 _mm512_undefined_epi32 (),
756c5857
AI
6936 (__mmask16) -1);
6937}
6938
6939extern __inline __m512i
6940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6941_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6942{
6943 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6944 (__v16si) __W,
6945 (__mmask16) __U);
6946}
6947
6948extern __inline __m512i
6949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6950_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6951{
6952 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6953 (__v16si)
6954 _mm512_setzero_si512 (),
6955 (__mmask16) __U);
6956}
6957
6958extern __inline __m512i
6959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6960_mm512_rol_epi64 (__m512i __A, const int __B)
6961{
6962 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6963 (__v8di)
4271e5cb 6964 _mm512_undefined_epi32 (),
756c5857
AI
6965 (__mmask8) -1);
6966}
6967
6968extern __inline __m512i
6969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6970_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6971{
6972 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6973 (__v8di) __W,
6974 (__mmask8) __U);
6975}
6976
6977extern __inline __m512i
6978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6979_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6980{
6981 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6982 (__v8di)
6983 _mm512_setzero_si512 (),
6984 (__mmask8) __U);
6985}
6986
6987extern __inline __m512i
6988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6989_mm512_ror_epi64 (__m512i __A, int __B)
6990{
6991 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6992 (__v8di)
4271e5cb 6993 _mm512_undefined_epi32 (),
756c5857
AI
6994 (__mmask8) -1);
6995}
6996
6997extern __inline __m512i
6998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7000{
7001 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7002 (__v8di) __W,
7003 (__mmask8) __U);
7004}
7005
7006extern __inline __m512i
7007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7008_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7009{
7010 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7011 (__v8di)
7012 _mm512_setzero_si512 (),
7013 (__mmask8) __U);
7014}
7015
7016#else
7017#define _mm512_rol_epi32(A, B) \
7018 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7019 (int)(B), \
4271e5cb 7020 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7021 (__mmask16)(-1)))
7022#define _mm512_mask_rol_epi32(W, U, A, B) \
7023 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7024 (int)(B), \
7025 (__v16si)(__m512i)(W), \
7026 (__mmask16)(U)))
7027#define _mm512_maskz_rol_epi32(U, A, B) \
7028 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7029 (int)(B), \
7030 (__v16si)_mm512_setzero_si512 (), \
7031 (__mmask16)(U)))
7032#define _mm512_ror_epi32(A, B) \
7033 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7034 (int)(B), \
4271e5cb 7035 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7036 (__mmask16)(-1)))
7037#define _mm512_mask_ror_epi32(W, U, A, B) \
7038 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7039 (int)(B), \
7040 (__v16si)(__m512i)(W), \
7041 (__mmask16)(U)))
7042#define _mm512_maskz_ror_epi32(U, A, B) \
7043 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7044 (int)(B), \
7045 (__v16si)_mm512_setzero_si512 (), \
7046 (__mmask16)(U)))
7047#define _mm512_rol_epi64(A, B) \
7048 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7049 (int)(B), \
4271e5cb 7050 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7051 (__mmask8)(-1)))
7052#define _mm512_mask_rol_epi64(W, U, A, B) \
7053 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7054 (int)(B), \
7055 (__v8di)(__m512i)(W), \
7056 (__mmask8)(U)))
7057#define _mm512_maskz_rol_epi64(U, A, B) \
7058 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7059 (int)(B), \
7060 (__v8di)_mm512_setzero_si512 (), \
7061 (__mmask8)(U)))
7062
7063#define _mm512_ror_epi64(A, B) \
7064 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7065 (int)(B), \
4271e5cb 7066 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7067 (__mmask8)(-1)))
7068#define _mm512_mask_ror_epi64(W, U, A, B) \
7069 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7070 (int)(B), \
7071 (__v8di)(__m512i)(W), \
7072 (__mmask8)(U)))
7073#define _mm512_maskz_ror_epi64(U, A, B) \
7074 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7075 (int)(B), \
7076 (__v8di)_mm512_setzero_si512 (), \
7077 (__mmask8)(U)))
7078#endif
7079
7080extern __inline __m512i
7081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7082_mm512_and_si512 (__m512i __A, __m512i __B)
7083{
2069d6fc 7084 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7085}
7086
7087extern __inline __m512i
7088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7089_mm512_and_epi32 (__m512i __A, __m512i __B)
7090{
2069d6fc 7091 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7092}
7093
7094extern __inline __m512i
7095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7096_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7097{
7098 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7099 (__v16si) __B,
7100 (__v16si) __W,
7101 (__mmask16) __U);
7102}
7103
7104extern __inline __m512i
7105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7106_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7107{
7108 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7109 (__v16si) __B,
7110 (__v16si)
7111 _mm512_setzero_si512 (),
7112 (__mmask16) __U);
7113}
7114
7115extern __inline __m512i
7116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7117_mm512_and_epi64 (__m512i __A, __m512i __B)
7118{
2069d6fc 7119 return (__m512i) ((__v8du) __A & (__v8du) __B);
756c5857
AI
7120}
7121
7122extern __inline __m512i
7123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7124_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7125{
7126 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7127 (__v8di) __B,
7128 (__v8di) __W, __U);
7129}
7130
7131extern __inline __m512i
7132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7133_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7134{
7135 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7136 (__v8di) __B,
7137 (__v8di)
7138 _mm512_setzero_pd (),
7139 __U);
7140}
7141
7142extern __inline __m512i
7143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7144_mm512_andnot_si512 (__m512i __A, __m512i __B)
7145{
7146 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7147 (__v16si) __B,
7148 (__v16si)
4271e5cb 7149 _mm512_undefined_epi32 (),
756c5857
AI
7150 (__mmask16) -1);
7151}
7152
7153extern __inline __m512i
7154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7155_mm512_andnot_epi32 (__m512i __A, __m512i __B)
7156{
7157 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7158 (__v16si) __B,
7159 (__v16si)
4271e5cb 7160 _mm512_undefined_epi32 (),
756c5857
AI
7161 (__mmask16) -1);
7162}
7163
7164extern __inline __m512i
7165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7166_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7167{
7168 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7169 (__v16si) __B,
7170 (__v16si) __W,
7171 (__mmask16) __U);
7172}
7173
7174extern __inline __m512i
7175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7176_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7177{
7178 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7179 (__v16si) __B,
7180 (__v16si)
7181 _mm512_setzero_si512 (),
7182 (__mmask16) __U);
7183}
7184
7185extern __inline __m512i
7186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7187_mm512_andnot_epi64 (__m512i __A, __m512i __B)
7188{
7189 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7190 (__v8di) __B,
7191 (__v8di)
4271e5cb 7192 _mm512_undefined_epi32 (),
756c5857
AI
7193 (__mmask8) -1);
7194}
7195
7196extern __inline __m512i
7197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7198_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7199{
7200 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7201 (__v8di) __B,
7202 (__v8di) __W, __U);
7203}
7204
7205extern __inline __m512i
7206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7207_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7208{
7209 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7210 (__v8di) __B,
7211 (__v8di)
7212 _mm512_setzero_pd (),
7213 __U);
7214}
7215
7216extern __inline __mmask16
7217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7218_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7219{
7220 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7221 (__v16si) __B,
7222 (__mmask16) -1);
7223}
7224
7225extern __inline __mmask16
7226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7227_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7228{
7229 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7230 (__v16si) __B, __U);
7231}
7232
7233extern __inline __mmask8
7234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7236{
7237 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7238 (__v8di) __B,
7239 (__mmask8) -1);
7240}
7241
7242extern __inline __mmask8
7243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7244_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7245{
7246 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7247}
7248
260d3642
IT
7249extern __inline __mmask16
7250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7251_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7252{
7253 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7254 (__v16si) __B,
7255 (__mmask16) -1);
7256}
7257
7258extern __inline __mmask16
7259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7260_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7261{
7262 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7263 (__v16si) __B, __U);
7264}
7265
7266extern __inline __mmask8
7267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7268_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7269{
7270 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7271 (__v8di) __B,
7272 (__mmask8) -1);
7273}
7274
7275extern __inline __mmask8
7276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7277_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7278{
7279 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7280 (__v8di) __B, __U);
7281}
7282
dcb2c527
JJ
7283extern __inline __m512
7284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7285_mm512_abs_ps (__m512 __A)
7286{
7287 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7288 _mm512_set1_epi32 (0x7fffffff));
7289}
7290
7291extern __inline __m512
7292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293_mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7294{
7295 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7296 _mm512_set1_epi32 (0x7fffffff));
7297}
7298
7299extern __inline __m512d
7300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7301_mm512_abs_pd (__m512 __A)
7302{
7303 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7304 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7305}
7306
7307extern __inline __m512d
7308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7309_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512 __A)
7310{
7311 return (__m512d)
7312 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7313 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7314}
7315
756c5857
AI
7316extern __inline __m512i
7317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7318_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7319{
7320 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7321 (__v16si) __B,
7322 (__v16si)
4271e5cb 7323 _mm512_undefined_epi32 (),
756c5857
AI
7324 (__mmask16) -1);
7325}
7326
7327extern __inline __m512i
7328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7329_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7330 __m512i __B)
7331{
7332 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7333 (__v16si) __B,
7334 (__v16si) __W,
7335 (__mmask16) __U);
7336}
7337
7338extern __inline __m512i
7339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7340_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7341{
7342 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7343 (__v16si) __B,
7344 (__v16si)
7345 _mm512_setzero_si512 (),
7346 (__mmask16) __U);
7347}
7348
7349extern __inline __m512i
7350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7351_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7352{
7353 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7354 (__v8di) __B,
7355 (__v8di)
4271e5cb 7356 _mm512_undefined_epi32 (),
756c5857
AI
7357 (__mmask8) -1);
7358}
7359
7360extern __inline __m512i
7361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7362_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7363{
7364 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7365 (__v8di) __B,
7366 (__v8di) __W,
7367 (__mmask8) __U);
7368}
7369
7370extern __inline __m512i
7371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7372_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7373{
7374 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7375 (__v8di) __B,
7376 (__v8di)
7377 _mm512_setzero_si512 (),
7378 (__mmask8) __U);
7379}
7380
7381extern __inline __m512i
7382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7383_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7384{
7385 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7386 (__v16si) __B,
7387 (__v16si)
4271e5cb 7388 _mm512_undefined_epi32 (),
756c5857
AI
7389 (__mmask16) -1);
7390}
7391
7392extern __inline __m512i
7393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7394_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7395 __m512i __B)
7396{
7397 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7398 (__v16si) __B,
7399 (__v16si) __W,
7400 (__mmask16) __U);
7401}
7402
7403extern __inline __m512i
7404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7405_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7406{
7407 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7408 (__v16si) __B,
7409 (__v16si)
7410 _mm512_setzero_si512 (),
7411 (__mmask16) __U);
7412}
7413
7414extern __inline __m512i
7415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7416_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7417{
7418 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7419 (__v8di) __B,
7420 (__v8di)
4271e5cb 7421 _mm512_undefined_epi32 (),
756c5857
AI
7422 (__mmask8) -1);
7423}
7424
7425extern __inline __m512i
7426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7427_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7428{
7429 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7430 (__v8di) __B,
7431 (__v8di) __W,
7432 (__mmask8) __U);
7433}
7434
7435extern __inline __m512i
7436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7437_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7438{
7439 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7440 (__v8di) __B,
7441 (__v8di)
7442 _mm512_setzero_si512 (),
7443 (__mmask8) __U);
7444}
7445
7446#ifdef __x86_64__
7447#ifdef __OPTIMIZE__
7448extern __inline unsigned long long
7449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7450_mm_cvt_roundss_u64 (__m128 __A, const int __R)
7451{
7452 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7453}
7454
7455extern __inline long long
7456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7457_mm_cvt_roundss_si64 (__m128 __A, const int __R)
7458{
7459 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7460}
7461
7462extern __inline long long
7463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7464_mm_cvt_roundss_i64 (__m128 __A, const int __R)
7465{
7466 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7467}
7468
7469extern __inline unsigned long long
7470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7471_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7472{
7473 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7474}
7475
7476extern __inline long long
7477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7478_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7479{
7480 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7481}
7482
7483extern __inline long long
7484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7485_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7486{
7487 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7488}
7489#else
7490#define _mm_cvt_roundss_u64(A, B) \
7491 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7492
7493#define _mm_cvt_roundss_si64(A, B) \
7494 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7495
7496#define _mm_cvt_roundss_i64(A, B) \
7497 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7498
7499#define _mm_cvtt_roundss_u64(A, B) \
7500 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7501
7502#define _mm_cvtt_roundss_i64(A, B) \
7503 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7504
7505#define _mm_cvtt_roundss_si64(A, B) \
7506 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7507#endif
7508#endif
7509
7510#ifdef __OPTIMIZE__
7511extern __inline unsigned
7512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7513_mm_cvt_roundss_u32 (__m128 __A, const int __R)
7514{
7515 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7516}
7517
7518extern __inline int
7519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7520_mm_cvt_roundss_si32 (__m128 __A, const int __R)
7521{
7522 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7523}
7524
7525extern __inline int
7526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7527_mm_cvt_roundss_i32 (__m128 __A, const int __R)
7528{
7529 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7530}
7531
7532extern __inline unsigned
7533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7534_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7535{
7536 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7537}
7538
7539extern __inline int
7540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7541_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7542{
7543 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7544}
7545
7546extern __inline int
7547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7548_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7549{
7550 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7551}
7552#else
7553#define _mm_cvt_roundss_u32(A, B) \
7554 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7555
7556#define _mm_cvt_roundss_si32(A, B) \
7557 ((int)__builtin_ia32_vcvtss2si32(A, B))
7558
7559#define _mm_cvt_roundss_i32(A, B) \
7560 ((int)__builtin_ia32_vcvtss2si32(A, B))
7561
7562#define _mm_cvtt_roundss_u32(A, B) \
7563 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7564
7565#define _mm_cvtt_roundss_si32(A, B) \
7566 ((int)__builtin_ia32_vcvttss2si32(A, B))
7567
7568#define _mm_cvtt_roundss_i32(A, B) \
7569 ((int)__builtin_ia32_vcvttss2si32(A, B))
7570#endif
7571
7572#ifdef __x86_64__
7573#ifdef __OPTIMIZE__
7574extern __inline unsigned long long
7575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7576_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7577{
7578 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7579}
7580
7581extern __inline long long
7582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7583_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7584{
7585 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7586}
7587
7588extern __inline long long
7589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7590_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7591{
7592 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7593}
7594
7595extern __inline unsigned long long
7596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7598{
7599 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7600}
7601
7602extern __inline long long
7603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7604_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7605{
7606 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7607}
7608
7609extern __inline long long
7610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7611_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7612{
7613 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7614}
7615#else
7616#define _mm_cvt_roundsd_u64(A, B) \
7617 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7618
7619#define _mm_cvt_roundsd_si64(A, B) \
7620 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7621
7622#define _mm_cvt_roundsd_i64(A, B) \
7623 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7624
7625#define _mm_cvtt_roundsd_u64(A, B) \
7626 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7627
7628#define _mm_cvtt_roundsd_si64(A, B) \
7629 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7630
7631#define _mm_cvtt_roundsd_i64(A, B) \
7632 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7633#endif
7634#endif
7635
7636#ifdef __OPTIMIZE__
7637extern __inline unsigned
7638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7639_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7640{
7641 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7642}
7643
7644extern __inline int
7645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7646_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7647{
7648 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7649}
7650
7651extern __inline int
7652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7653_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7654{
7655 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7656}
7657
7658extern __inline unsigned
7659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7660_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7661{
7662 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7663}
7664
7665extern __inline int
7666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7667_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7668{
7669 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7670}
7671
7672extern __inline int
7673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7674_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7675{
7676 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7677}
7678#else
7679#define _mm_cvt_roundsd_u32(A, B) \
7680 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7681
7682#define _mm_cvt_roundsd_si32(A, B) \
7683 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7684
7685#define _mm_cvt_roundsd_i32(A, B) \
7686 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7687
7688#define _mm_cvtt_roundsd_u32(A, B) \
7689 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7690
7691#define _mm_cvtt_roundsd_si32(A, B) \
7692 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7693
7694#define _mm_cvtt_roundsd_i32(A, B) \
7695 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7696#endif
7697
7698extern __inline __m512d
7699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7700_mm512_movedup_pd (__m512d __A)
7701{
7702 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7703 (__v8df)
0b192937 7704 _mm512_undefined_pd (),
756c5857
AI
7705 (__mmask8) -1);
7706}
7707
7708extern __inline __m512d
7709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7710_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7711{
7712 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7713 (__v8df) __W,
7714 (__mmask8) __U);
7715}
7716
7717extern __inline __m512d
7718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7719_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7720{
7721 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7722 (__v8df)
7723 _mm512_setzero_pd (),
7724 (__mmask8) __U);
7725}
7726
7727extern __inline __m512d
7728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7729_mm512_unpacklo_pd (__m512d __A, __m512d __B)
7730{
7731 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7732 (__v8df) __B,
7733 (__v8df)
0b192937 7734 _mm512_undefined_pd (),
756c5857
AI
7735 (__mmask8) -1);
7736}
7737
7738extern __inline __m512d
7739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7740_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7741{
7742 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7743 (__v8df) __B,
7744 (__v8df) __W,
7745 (__mmask8) __U);
7746}
7747
7748extern __inline __m512d
7749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7750_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7751{
7752 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7753 (__v8df) __B,
7754 (__v8df)
7755 _mm512_setzero_pd (),
7756 (__mmask8) __U);
7757}
7758
7759extern __inline __m512d
7760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7761_mm512_unpackhi_pd (__m512d __A, __m512d __B)
7762{
7763 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7764 (__v8df) __B,
7765 (__v8df)
0b192937 7766 _mm512_undefined_pd (),
756c5857
AI
7767 (__mmask8) -1);
7768}
7769
7770extern __inline __m512d
7771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7772_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7773{
7774 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7775 (__v8df) __B,
7776 (__v8df) __W,
7777 (__mmask8) __U);
7778}
7779
7780extern __inline __m512d
7781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7782_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7783{
7784 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7785 (__v8df) __B,
7786 (__v8df)
7787 _mm512_setzero_pd (),
7788 (__mmask8) __U);
7789}
7790
7791extern __inline __m512
7792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7793_mm512_unpackhi_ps (__m512 __A, __m512 __B)
7794{
7795 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7796 (__v16sf) __B,
7797 (__v16sf)
0b192937 7798 _mm512_undefined_ps (),
756c5857
AI
7799 (__mmask16) -1);
7800}
7801
7802extern __inline __m512
7803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7804_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7805{
7806 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7807 (__v16sf) __B,
7808 (__v16sf) __W,
7809 (__mmask16) __U);
7810}
7811
7812extern __inline __m512
7813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7814_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7815{
7816 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7817 (__v16sf) __B,
7818 (__v16sf)
7819 _mm512_setzero_ps (),
7820 (__mmask16) __U);
7821}
7822
7823#ifdef __OPTIMIZE__
7824extern __inline __m512d
7825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7826_mm512_cvt_roundps_pd (__m256 __A, const int __R)
7827{
7828 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7829 (__v8df)
0b192937 7830 _mm512_undefined_pd (),
756c5857
AI
7831 (__mmask8) -1, __R);
7832}
7833
7834extern __inline __m512d
7835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7836_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7837 const int __R)
7838{
7839 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7840 (__v8df) __W,
7841 (__mmask8) __U, __R);
7842}
7843
7844extern __inline __m512d
7845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7846_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7847{
7848 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7849 (__v8df)
7850 _mm512_setzero_pd (),
7851 (__mmask8) __U, __R);
7852}
7853
7854extern __inline __m512
7855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7856_mm512_cvt_roundph_ps (__m256i __A, const int __R)
7857{
7858 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7859 (__v16sf)
0b192937 7860 _mm512_undefined_ps (),
756c5857
AI
7861 (__mmask16) -1, __R);
7862}
7863
7864extern __inline __m512
7865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7866_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7867 const int __R)
7868{
7869 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7870 (__v16sf) __W,
7871 (__mmask16) __U, __R);
7872}
7873
7874extern __inline __m512
7875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7876_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7877{
7878 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7879 (__v16sf)
7880 _mm512_setzero_ps (),
7881 (__mmask16) __U, __R);
7882}
7883
7884extern __inline __m256i
7885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7886_mm512_cvt_roundps_ph (__m512 __A, const int __I)
7887{
7888 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7889 __I,
7890 (__v16hi)
0b192937 7891 _mm256_undefined_si256 (),
756c5857
AI
7892 -1);
7893}
7894
7895extern __inline __m256i
7896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7897_mm512_cvtps_ph (__m512 __A, const int __I)
7898{
7899 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7900 __I,
7901 (__v16hi)
0b192937 7902 _mm256_undefined_si256 (),
756c5857
AI
7903 -1);
7904}
7905
7906extern __inline __m256i
7907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7908_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7909 const int __I)
7910{
7911 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7912 __I,
7913 (__v16hi) __U,
7914 (__mmask16) __W);
7915}
7916
7917extern __inline __m256i
7918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7919_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7920{
7921 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7922 __I,
7923 (__v16hi) __U,
7924 (__mmask16) __W);
7925}
7926
7927extern __inline __m256i
7928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7929_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7930{
7931 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7932 __I,
7933 (__v16hi)
7934 _mm256_setzero_si256 (),
7935 (__mmask16) __W);
7936}
7937
7938extern __inline __m256i
7939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7940_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7941{
7942 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7943 __I,
7944 (__v16hi)
7945 _mm256_setzero_si256 (),
7946 (__mmask16) __W);
7947}
7948#else
7949#define _mm512_cvt_roundps_pd(A, B) \
0b192937 7950 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
756c5857
AI
7951
7952#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7953 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7954
7955#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7956 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7957
7958#define _mm512_cvt_roundph_ps(A, B) \
0b192937 7959 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
7960
7961#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7962 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7963
7964#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7965 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7966
7967#define _mm512_cvt_roundps_ph(A, I) \
7968 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0b192937 7969 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857
AI
7970#define _mm512_cvtps_ph(A, I) \
7971 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0b192937 7972 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857
AI
7973#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7974 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7975 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7976#define _mm512_mask_cvtps_ph(U, W, A, I) \
7977 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7978 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7979#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7980 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7981 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7982#define _mm512_maskz_cvtps_ph(W, A, I) \
7983 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7984 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7985#endif
7986
7987#ifdef __OPTIMIZE__
7988extern __inline __m256
7989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7990_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7991{
7992 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7993 (__v8sf)
0b192937 7994 _mm256_undefined_ps (),
756c5857
AI
7995 (__mmask8) -1, __R);
7996}
7997
7998extern __inline __m256
7999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8000_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8001 const int __R)
8002{
8003 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8004 (__v8sf) __W,
8005 (__mmask8) __U, __R);
8006}
8007
8008extern __inline __m256
8009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8010_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8011{
8012 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8013 (__v8sf)
8014 _mm256_setzero_ps (),
8015 (__mmask8) __U, __R);
8016}
8017
075691af
AI
8018extern __inline __m128
8019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8020_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8021{
8022 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8023 (__v2df) __B,
8024 __R);
8025}
8026
8027extern __inline __m128d
8028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8029_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8030{
8031 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8032 (__v4sf) __B,
8033 __R);
8034}
756c5857
AI
8035#else
8036#define _mm512_cvt_roundpd_ps(A, B) \
0b192937 8037 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
756c5857
AI
8038
8039#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8040 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8041
8042#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8043 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
075691af
AI
8044
8045#define _mm_cvt_roundsd_ss(A, B, C) \
8046 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8047
8048#define _mm_cvt_roundss_sd(A, B, C) \
8049 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
756c5857
AI
8050#endif
8051
8052extern __inline void
8053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054_mm512_stream_si512 (__m512i * __P, __m512i __A)
8055{
8056 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8057}
8058
8059extern __inline void
8060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8061_mm512_stream_ps (float *__P, __m512 __A)
8062{
8063 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8064}
8065
8066extern __inline void
8067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8068_mm512_stream_pd (double *__P, __m512d __A)
8069{
8070 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8071}
8072
c56a42b9
KY
8073extern __inline __m512i
8074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8075_mm512_stream_load_si512 (void *__P)
8076{
8077 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8078}
8079
9c3c2608
UB
8080/* Constants for mantissa extraction */
8081typedef enum
8082{
8083 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8084 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8085 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8086 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8087} _MM_MANTISSA_NORM_ENUM;
8088
8089typedef enum
8090{
8091 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8092 _MM_MANT_SIGN_zero, /* sign = 0 */
8093 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8094} _MM_MANTISSA_SIGN_ENUM;
8095
756c5857 8096#ifdef __OPTIMIZE__
075691af
AI
8097extern __inline __m128
8098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8099_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8100{
8101 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8102 (__v4sf) __B,
8103 __R);
8104}
8105
8106extern __inline __m128d
8107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8108_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8109{
8110 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8111 (__v2df) __B,
8112 __R);
8113}
8114
756c5857
AI
8115extern __inline __m512
8116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8117_mm512_getexp_round_ps (__m512 __A, const int __R)
8118{
8119 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8120 (__v16sf)
0b192937 8121 _mm512_undefined_ps (),
756c5857
AI
8122 (__mmask16) -1, __R);
8123}
8124
8125extern __inline __m512
8126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8127_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8128 const int __R)
8129{
8130 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8131 (__v16sf) __W,
8132 (__mmask16) __U, __R);
8133}
8134
8135extern __inline __m512
8136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8137_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8138{
8139 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8140 (__v16sf)
8141 _mm512_setzero_ps (),
8142 (__mmask16) __U, __R);
8143}
8144
8145extern __inline __m512d
8146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8147_mm512_getexp_round_pd (__m512d __A, const int __R)
8148{
8149 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8150 (__v8df)
0b192937 8151 _mm512_undefined_pd (),
756c5857
AI
8152 (__mmask8) -1, __R);
8153}
8154
8155extern __inline __m512d
8156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8157_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8158 const int __R)
8159{
8160 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8161 (__v8df) __W,
8162 (__mmask8) __U, __R);
8163}
8164
8165extern __inline __m512d
8166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8167_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8168{
8169 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8170 (__v8df)
8171 _mm512_setzero_pd (),
8172 (__mmask8) __U, __R);
8173}
8174
756c5857
AI
8175extern __inline __m512d
8176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8177_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8178 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8179{
8180 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8181 (__C << 2) | __B,
0b192937 8182 _mm512_undefined_pd (),
756c5857
AI
8183 (__mmask8) -1, __R);
8184}
8185
8186extern __inline __m512d
8187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8188_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8189 _MM_MANTISSA_NORM_ENUM __B,
8190 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8191{
8192 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8193 (__C << 2) | __B,
8194 (__v8df) __W, __U,
8195 __R);
8196}
8197
8198extern __inline __m512d
8199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8201 _MM_MANTISSA_NORM_ENUM __B,
8202 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8203{
8204 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8205 (__C << 2) | __B,
8206 (__v8df)
8207 _mm512_setzero_pd (),
8208 __U, __R);
8209}
8210
8211extern __inline __m512
8212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8213_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8214 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8215{
8216 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8217 (__C << 2) | __B,
0b192937 8218 _mm512_undefined_ps (),
756c5857
AI
8219 (__mmask16) -1, __R);
8220}
8221
8222extern __inline __m512
8223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8224_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8225 _MM_MANTISSA_NORM_ENUM __B,
8226 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8227{
8228 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8229 (__C << 2) | __B,
8230 (__v16sf) __W, __U,
8231 __R);
8232}
8233
8234extern __inline __m512
8235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8236_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8237 _MM_MANTISSA_NORM_ENUM __B,
8238 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8239{
8240 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8241 (__C << 2) | __B,
8242 (__v16sf)
8243 _mm512_setzero_ps (),
8244 __U, __R);
8245}
8246
075691af
AI
8247extern __inline __m128d
8248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8249_mm_getmant_round_sd (__m128d __A, __m128d __B,
8250 _MM_MANTISSA_NORM_ENUM __C,
8251 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8252{
8253 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8254 (__v2df) __B,
8255 (__D << 2) | __C,
8256 __R);
8257}
8258
8259extern __inline __m128
8260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8261_mm_getmant_round_ss (__m128 __A, __m128 __B,
8262 _MM_MANTISSA_NORM_ENUM __C,
8263 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8264{
8265 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8266 (__v4sf) __B,
8267 (__D << 2) | __C,
8268 __R);
8269}
8270
756c5857
AI
8271#else
8272#define _mm512_getmant_round_pd(X, B, C, R) \
8273 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8274 (int)(((C)<<2) | (B)), \
0b192937 8275 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
8276 (__mmask8)-1,\
8277 (R)))
8278
8279#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8280 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8281 (int)(((C)<<2) | (B)), \
8282 (__v8df)(__m512d)(W), \
8283 (__mmask8)(U),\
8284 (R)))
8285
8286#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8287 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8288 (int)(((C)<<2) | (B)), \
8289 (__v8df)(__m512d)_mm512_setzero_pd(), \
8290 (__mmask8)(U),\
8291 (R)))
8292#define _mm512_getmant_round_ps(X, B, C, R) \
8293 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8294 (int)(((C)<<2) | (B)), \
0b192937 8295 (__v16sf)(__m512)_mm512_undefined_ps(), \
756c5857
AI
8296 (__mmask16)-1,\
8297 (R)))
8298
8299#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8300 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8301 (int)(((C)<<2) | (B)), \
8302 (__v16sf)(__m512)(W), \
8303 (__mmask16)(U),\
8304 (R)))
8305
8306#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8307 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8308 (int)(((C)<<2) | (B)), \
8309 (__v16sf)(__m512)_mm512_setzero_ps(), \
8310 (__mmask16)(U),\
8311 (R)))
075691af
AI
8312#define _mm_getmant_round_sd(X, Y, C, D, R) \
8313 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8314 (__v2df)(__m128d)(Y), \
8315 (int)(((D)<<2) | (C)), \
8316 (R)))
8317
8318#define _mm_getmant_round_ss(X, Y, C, D, R) \
8319 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8320 (__v4sf)(__m128)(Y), \
8321 (int)(((D)<<2) | (C)), \
8322 (R)))
8323
8324#define _mm_getexp_round_ss(A, B, R) \
8325 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8326
8327#define _mm_getexp_round_sd(A, B, R) \
8328 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8329
756c5857
AI
8330#define _mm512_getexp_round_ps(A, R) \
8331 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 8332 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
756c5857
AI
8333
8334#define _mm512_mask_getexp_round_ps(W, U, A, R) \
8335 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8336 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8337
8338#define _mm512_maskz_getexp_round_ps(U, A, R) \
8339 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8340 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8341
8342#define _mm512_getexp_round_pd(A, R) \
8343 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 8344 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
756c5857
AI
8345
8346#define _mm512_mask_getexp_round_pd(W, U, A, R) \
8347 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8348 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8349
8350#define _mm512_maskz_getexp_round_pd(U, A, R) \
8351 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8352 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8353#endif
8354
8355#ifdef __OPTIMIZE__
8356extern __inline __m512
8357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8358_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8359{
8360 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
8361 (__v16sf)
8362 _mm512_undefined_ps (),
8363 -1, __R);
756c5857
AI
8364}
8365
8366extern __inline __m512
8367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8368_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8369 const int __imm, const int __R)
8370{
8371 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8372 (__v16sf) __A,
8373 (__mmask16) __B, __R);
8374}
8375
8376extern __inline __m512
8377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8378_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8379 const int __imm, const int __R)
8380{
8381 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8382 __imm,
8383 (__v16sf)
8384 _mm512_setzero_ps (),
8385 (__mmask16) __A, __R);
8386}
8387
8388extern __inline __m512d
8389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8390_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8391{
8392 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
8393 (__v8df)
8394 _mm512_undefined_pd (),
8395 -1, __R);
756c5857
AI
8396}
8397
8398extern __inline __m512d
8399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8400_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8401 __m512d __C, const int __imm, const int __R)
8402{
8403 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8404 (__v8df) __A,
8405 (__mmask8) __B, __R);
8406}
8407
8408extern __inline __m512d
8409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8410_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8411 const int __imm, const int __R)
8412{
8413 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8414 __imm,
8415 (__v8df)
8416 _mm512_setzero_pd (),
8417 (__mmask8) __A, __R);
8418}
075691af
AI
8419
8420extern __inline __m128
8421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8422_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8423{
8424 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8425 (__v4sf) __B, __imm, __R);
8426}
8427
8428extern __inline __m128d
8429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8430_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8431 const int __R)
8432{
8433 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8434 (__v2df) __B, __imm, __R);
8435}
8436
756c5857
AI
8437#else
8438#define _mm512_roundscale_round_ps(A, B, R) \
8439 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 8440 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
756c5857
AI
8441#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8442 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8443 (int)(D), \
8444 (__v16sf)(__m512)(A), \
8445 (__mmask16)(B), R))
8446#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8447 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8448 (int)(C), \
8449 (__v16sf)_mm512_setzero_ps(),\
8450 (__mmask16)(A), R))
8451#define _mm512_roundscale_round_pd(A, B, R) \
8452 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 8453 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
756c5857
AI
8454#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8455 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8456 (int)(D), \
8457 (__v8df)(__m512d)(A), \
8458 (__mmask8)(B), R))
8459#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8460 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8461 (int)(C), \
8462 (__v8df)_mm512_setzero_pd(),\
8463 (__mmask8)(A), R))
075691af
AI
8464#define _mm_roundscale_round_ss(A, B, C, R) \
8465 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8466 (__v4sf)(__m128)(B), (int)(C), R))
8467#define _mm_roundscale_round_sd(A, B, C, R) \
8468 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8469 (__v2df)(__m128d)(B), (int)(C), R))
756c5857
AI
8470#endif
8471
8472extern __inline __m512
8473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8474_mm512_floor_ps (__m512 __A)
8475{
8476 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8477 _MM_FROUND_FLOOR,
8478 (__v16sf) __A, -1,
8479 _MM_FROUND_CUR_DIRECTION);
8480}
8481
8482extern __inline __m512d
8483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8484_mm512_floor_pd (__m512d __A)
8485{
8486 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8487 _MM_FROUND_FLOOR,
8488 (__v8df) __A, -1,
8489 _MM_FROUND_CUR_DIRECTION);
8490}
8491
8492extern __inline __m512
8493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8494_mm512_ceil_ps (__m512 __A)
8495{
8496 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8497 _MM_FROUND_CEIL,
8498 (__v16sf) __A, -1,
8499 _MM_FROUND_CUR_DIRECTION);
8500}
8501
8502extern __inline __m512d
8503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8504_mm512_ceil_pd (__m512d __A)
8505{
8506 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8507 _MM_FROUND_CEIL,
8508 (__v8df) __A, -1,
8509 _MM_FROUND_CUR_DIRECTION);
8510}
8511
8512extern __inline __m512
8513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8514_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8515{
8516 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8517 _MM_FROUND_FLOOR,
8518 (__v16sf) __W, __U,
8519 _MM_FROUND_CUR_DIRECTION);
8520}
8521
8522extern __inline __m512d
8523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8524_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8525{
8526 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8527 _MM_FROUND_FLOOR,
8528 (__v8df) __W, __U,
8529 _MM_FROUND_CUR_DIRECTION);
8530}
8531
8532extern __inline __m512
8533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8534_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8535{
8536 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8537 _MM_FROUND_CEIL,
8538 (__v16sf) __W, __U,
8539 _MM_FROUND_CUR_DIRECTION);
8540}
8541
8542extern __inline __m512d
8543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8545{
8546 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8547 _MM_FROUND_CEIL,
8548 (__v8df) __W, __U,
8549 _MM_FROUND_CUR_DIRECTION);
8550}
8551
756c5857 8552#ifdef __OPTIMIZE__
756c5857
AI
8553extern __inline __m512i
8554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8556{
8557 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8558 (__v16si) __B, __imm,
8559 (__v16si)
4271e5cb 8560 _mm512_undefined_epi32 (),
756c5857
AI
8561 (__mmask16) -1);
8562}
8563
8564extern __inline __m512i
8565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8567 __m512i __B, const int __imm)
8568{
8569 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8570 (__v16si) __B, __imm,
8571 (__v16si) __W,
8572 (__mmask16) __U);
8573}
8574
8575extern __inline __m512i
8576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8577_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8578 const int __imm)
8579{
8580 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8581 (__v16si) __B, __imm,
8582 (__v16si)
8583 _mm512_setzero_si512 (),
8584 (__mmask16) __U);
8585}
8586
8587extern __inline __m512i
8588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8589_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8590{
8591 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8592 (__v8di) __B, __imm,
8593 (__v8di)
4271e5cb 8594 _mm512_undefined_epi32 (),
756c5857
AI
8595 (__mmask8) -1);
8596}
8597
8598extern __inline __m512i
8599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8600_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8601 __m512i __B, const int __imm)
8602{
8603 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8604 (__v8di) __B, __imm,
8605 (__v8di) __W,
8606 (__mmask8) __U);
8607}
8608
8609extern __inline __m512i
8610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8611_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8612 const int __imm)
8613{
8614 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8615 (__v8di) __B, __imm,
8616 (__v8di)
8617 _mm512_setzero_si512 (),
8618 (__mmask8) __U);
8619}
8620#else
756c5857
AI
8621#define _mm512_alignr_epi32(X, Y, C) \
8622 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
4271e5cb 8623 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
756c5857
AI
8624 (__mmask16)-1))
8625
8626#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8627 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8628 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8629 (__mmask16)(U)))
8630
8631#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8632 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
0b192937 8633 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
756c5857
AI
8634 (__mmask16)(U)))
8635
8636#define _mm512_alignr_epi64(X, Y, C) \
8637 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
4271e5cb 8638 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
0b192937 8639 (__mmask8)-1))
756c5857
AI
8640
8641#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8642 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8643 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8644
8645#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8646 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
0b192937 8647 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
756c5857
AI
8648 (__mmask8)(U)))
8649#endif
8650
8651extern __inline __mmask16
8652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8653_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8654{
8655 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8656 (__v16si) __B,
8657 (__mmask16) -1);
8658}
8659
8660extern __inline __mmask16
8661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8662_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8663{
8664 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8665 (__v16si) __B, __U);
8666}
8667
8668extern __inline __mmask8
8669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8671{
8672 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8673 (__v8di) __B, __U);
8674}
8675
8676extern __inline __mmask8
8677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8678_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8679{
8680 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8681 (__v8di) __B,
8682 (__mmask8) -1);
8683}
8684
8685extern __inline __mmask16
8686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8687_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8688{
8689 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8690 (__v16si) __B,
8691 (__mmask16) -1);
8692}
8693
8694extern __inline __mmask16
8695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8696_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8697{
8698 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8699 (__v16si) __B, __U);
8700}
8701
8702extern __inline __mmask8
8703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8704_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8705{
8706 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8707 (__v8di) __B, __U);
8708}
8709
8710extern __inline __mmask8
8711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8713{
8714 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8715 (__v8di) __B,
8716 (__mmask8) -1);
8717}
8718
d256b866
IT
8719extern __inline __mmask16
8720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8721_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8722{
8723 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8724 (__v16si) __Y, 5,
8725 (__mmask16) -1);
8726}
8727
275be1da
IT
8728extern __inline __mmask16
8729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8731{
8732 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8733 (__v16si) __Y, 5,
8734 (__mmask16) __M);
8735}
8736
8737extern __inline __mmask16
8738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8739_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8740{
8741 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8742 (__v16si) __Y, 5,
8743 (__mmask16) __M);
8744}
8745
d256b866
IT
8746extern __inline __mmask16
8747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8748_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8749{
8750 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8751 (__v16si) __Y, 5,
8752 (__mmask16) -1);
8753}
8754
275be1da
IT
8755extern __inline __mmask8
8756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8757_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8758{
8759 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8760 (__v8di) __Y, 5,
8761 (__mmask8) __M);
8762}
8763
d256b866
IT
8764extern __inline __mmask8
8765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8766_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8767{
8768 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8769 (__v8di) __Y, 5,
8770 (__mmask8) -1);
8771}
8772
275be1da
IT
8773extern __inline __mmask8
8774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8775_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8776{
8777 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8778 (__v8di) __Y, 5,
8779 (__mmask8) __M);
8780}
8781
d256b866
IT
8782extern __inline __mmask8
8783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8784_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8785{
8786 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8787 (__v8di) __Y, 5,
8788 (__mmask8) -1);
8789}
8790
275be1da
IT
8791extern __inline __mmask16
8792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8793_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8794{
8795 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8796 (__v16si) __Y, 2,
8797 (__mmask16) __M);
8798}
8799
d256b866
IT
8800extern __inline __mmask16
8801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8803{
8804 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8805 (__v16si) __Y, 2,
8806 (__mmask16) -1);
8807}
8808
275be1da
IT
8809extern __inline __mmask16
8810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8812{
8813 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8814 (__v16si) __Y, 2,
8815 (__mmask16) __M);
8816}
8817
d256b866
IT
8818extern __inline __mmask16
8819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8820_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8821{
8822 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8823 (__v16si) __Y, 2,
8824 (__mmask16) -1);
8825}
8826
275be1da
IT
8827extern __inline __mmask8
8828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8829_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8830{
8831 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8832 (__v8di) __Y, 2,
8833 (__mmask8) __M);
8834}
8835
d256b866
IT
8836extern __inline __mmask8
8837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8839{
8840 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8841 (__v8di) __Y, 2,
8842 (__mmask8) -1);
8843}
8844
275be1da
IT
8845extern __inline __mmask8
8846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8847_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8848{
8849 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8850 (__v8di) __Y, 2,
8851 (__mmask8) __M);
8852}
8853
d256b866
IT
8854extern __inline __mmask8
8855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8856_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8857{
8858 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8859 (__v8di) __Y, 2,
8860 (__mmask8) -1);
8861}
8862
275be1da
IT
8863extern __inline __mmask16
8864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8865_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8866{
8867 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8868 (__v16si) __Y, 1,
8869 (__mmask16) __M);
8870}
8871
d256b866
IT
8872extern __inline __mmask16
8873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8874_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8875{
8876 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8877 (__v16si) __Y, 1,
8878 (__mmask16) -1);
8879}
8880
275be1da
IT
8881extern __inline __mmask16
8882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8883_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8884{
8885 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8886 (__v16si) __Y, 1,
8887 (__mmask16) __M);
8888}
8889
d256b866
IT
8890extern __inline __mmask16
8891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8892_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8893{
8894 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8895 (__v16si) __Y, 1,
8896 (__mmask16) -1);
8897}
8898
275be1da
IT
8899extern __inline __mmask8
8900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8902{
8903 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8904 (__v8di) __Y, 1,
8905 (__mmask8) __M);
8906}
8907
d256b866
IT
8908extern __inline __mmask8
8909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8911{
8912 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8913 (__v8di) __Y, 1,
8914 (__mmask8) -1);
8915}
8916
275be1da
IT
8917extern __inline __mmask8
8918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8919_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8920{
8921 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8922 (__v8di) __Y, 1,
8923 (__mmask8) __M);
8924}
8925
d256b866
IT
8926extern __inline __mmask8
8927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8928_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8929{
8930 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8931 (__v8di) __Y, 1,
8932 (__mmask8) -1);
8933}
8934
8935extern __inline __mmask16
8936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8937_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8938{
8939 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8940 (__v16si) __Y, 4,
8941 (__mmask16) -1);
8942}
8943
275be1da
IT
8944extern __inline __mmask16
8945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8946_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8947{
8948 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8949 (__v16si) __Y, 4,
8950 (__mmask16) __M);
8951}
8952
8953extern __inline __mmask16
8954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8955_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8956{
8957 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8958 (__v16si) __Y, 4,
8959 (__mmask16) __M);
8960}
8961
d256b866
IT
8962extern __inline __mmask16
8963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8964_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8965{
8966 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8967 (__v16si) __Y, 4,
8968 (__mmask16) -1);
8969}
8970
275be1da
IT
8971extern __inline __mmask8
8972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8973_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8974{
8975 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8976 (__v8di) __Y, 4,
8977 (__mmask8) __M);
8978}
8979
d256b866
IT
8980extern __inline __mmask8
8981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8982_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8983{
8984 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8985 (__v8di) __Y, 4,
8986 (__mmask8) -1);
8987}
8988
275be1da
IT
8989extern __inline __mmask8
8990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8991_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8992{
8993 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8994 (__v8di) __Y, 4,
8995 (__mmask8) __M);
8996}
8997
d256b866
IT
8998extern __inline __mmask8
8999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9000_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9001{
9002 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9003 (__v8di) __Y, 4,
9004 (__mmask8) -1);
9005}
9006
756c5857
AI
9007#define _MM_CMPINT_EQ 0x0
9008#define _MM_CMPINT_LT 0x1
9009#define _MM_CMPINT_LE 0x2
9010#define _MM_CMPINT_UNUSED 0x3
9011#define _MM_CMPINT_NE 0x4
9012#define _MM_CMPINT_NLT 0x5
9013#define _MM_CMPINT_GE 0x5
9014#define _MM_CMPINT_NLE 0x6
9015#define _MM_CMPINT_GT 0x6
9016
9017#ifdef __OPTIMIZE__
d8ea3e7c
AS
9018extern __inline __mmask16
9019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9020_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9021{
9022 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9023 (__mmask8) __B);
9024}
9025
9026extern __inline __mmask16
9027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9028_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9029{
9030 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9031 (__mmask8) __B);
9032}
9033
756c5857
AI
9034extern __inline __mmask8
9035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9036_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9037{
9038 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9039 (__v8di) __Y, __P,
9040 (__mmask8) -1);
9041}
9042
9043extern __inline __mmask16
9044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9045_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9046{
9047 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9048 (__v16si) __Y, __P,
9049 (__mmask16) -1);
9050}
9051
9052extern __inline __mmask8
9053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9054_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9055{
9056 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9057 (__v8di) __Y, __P,
9058 (__mmask8) -1);
9059}
9060
9061extern __inline __mmask16
9062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9063_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9064{
9065 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9066 (__v16si) __Y, __P,
9067 (__mmask16) -1);
9068}
9069
9070extern __inline __mmask8
9071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9072_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9073 const int __R)
9074{
9075 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9076 (__v8df) __Y, __P,
9077 (__mmask8) -1, __R);
9078}
9079
9080extern __inline __mmask16
9081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9082_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9083{
9084 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9085 (__v16sf) __Y, __P,
9086 (__mmask16) -1, __R);
9087}
9088
9089extern __inline __mmask8
9090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9091_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9092 const int __P)
9093{
9094 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9095 (__v8di) __Y, __P,
9096 (__mmask8) __U);
9097}
9098
9099extern __inline __mmask16
9100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9101_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9102 const int __P)
9103{
9104 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9105 (__v16si) __Y, __P,
9106 (__mmask16) __U);
9107}
9108
9109extern __inline __mmask8
9110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9111_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9112 const int __P)
9113{
9114 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9115 (__v8di) __Y, __P,
9116 (__mmask8) __U);
9117}
9118
9119extern __inline __mmask16
9120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9121_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9122 const int __P)
9123{
9124 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9125 (__v16si) __Y, __P,
9126 (__mmask16) __U);
9127}
9128
9129extern __inline __mmask8
9130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9131_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9132 const int __P, const int __R)
9133{
9134 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9135 (__v8df) __Y, __P,
9136 (__mmask8) __U, __R);
9137}
9138
9139extern __inline __mmask16
9140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9141_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9142 const int __P, const int __R)
9143{
9144 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9145 (__v16sf) __Y, __P,
9146 (__mmask16) __U, __R);
9147}
9148
9149extern __inline __mmask8
9150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9151_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9152{
9153 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9154 (__v2df) __Y, __P,
9155 (__mmask8) -1, __R);
9156}
9157
9158extern __inline __mmask8
9159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9160_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9161 const int __P, const int __R)
9162{
9163 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9164 (__v2df) __Y, __P,
9165 (__mmask8) __M, __R);
9166}
9167
9168extern __inline __mmask8
9169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9170_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9171{
9172 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9173 (__v4sf) __Y, __P,
9174 (__mmask8) -1, __R);
9175}
9176
9177extern __inline __mmask8
9178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9179_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9180 const int __P, const int __R)
9181{
9182 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9183 (__v4sf) __Y, __P,
9184 (__mmask8) __M, __R);
9185}
9186
9187#else
d8ea3e7c
AS
9188#define _kshiftli_mask16(X, Y) \
9189 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9190
9191#define _kshiftri_mask16(X, Y) \
9192 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9193
756c5857
AI
9194#define _mm512_cmp_epi64_mask(X, Y, P) \
9195 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9196 (__v8di)(__m512i)(Y), (int)(P),\
9197 (__mmask8)-1))
9198
9199#define _mm512_cmp_epi32_mask(X, Y, P) \
383321ec
UB
9200 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9201 (__v16si)(__m512i)(Y), (int)(P), \
9202 (__mmask16)-1))
756c5857
AI
9203
9204#define _mm512_cmp_epu64_mask(X, Y, P) \
9205 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9206 (__v8di)(__m512i)(Y), (int)(P),\
9207 (__mmask8)-1))
9208
9209#define _mm512_cmp_epu32_mask(X, Y, P) \
383321ec
UB
9210 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9211 (__v16si)(__m512i)(Y), (int)(P), \
9212 (__mmask16)-1))
756c5857 9213
383321ec 9214#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
756c5857
AI
9215 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9216 (__v8df)(__m512d)(Y), (int)(P),\
9217 (__mmask8)-1, R))
9218
383321ec 9219#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
756c5857
AI
9220 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9221 (__v16sf)(__m512)(Y), (int)(P),\
9222 (__mmask16)-1, R))
9223
383321ec 9224#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
756c5857
AI
9225 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9226 (__v8di)(__m512i)(Y), (int)(P),\
9227 (__mmask8)M))
9228
383321ec
UB
9229#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9230 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9231 (__v16si)(__m512i)(Y), (int)(P), \
9232 (__mmask16)M))
756c5857 9233
383321ec 9234#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
756c5857
AI
9235 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9236 (__v8di)(__m512i)(Y), (int)(P),\
9237 (__mmask8)M))
9238
383321ec
UB
9239#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9240 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9241 (__v16si)(__m512i)(Y), (int)(P), \
9242 (__mmask16)M))
756c5857 9243
383321ec 9244#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
756c5857
AI
9245 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9246 (__v8df)(__m512d)(Y), (int)(P),\
9247 (__mmask8)M, R))
9248
383321ec 9249#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
756c5857
AI
9250 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9251 (__v16sf)(__m512)(Y), (int)(P),\
9252 (__mmask16)M, R))
9253
383321ec 9254#define _mm_cmp_round_sd_mask(X, Y, P, R) \
756c5857
AI
9255 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9256 (__v2df)(__m128d)(Y), (int)(P),\
9257 (__mmask8)-1, R))
9258
383321ec 9259#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
756c5857
AI
9260 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9261 (__v2df)(__m128d)(Y), (int)(P),\
9262 (M), R))
9263
383321ec 9264#define _mm_cmp_round_ss_mask(X, Y, P, R) \
756c5857
AI
9265 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9266 (__v4sf)(__m128)(Y), (int)(P), \
9267 (__mmask8)-1, R))
9268
383321ec 9269#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
756c5857
AI
9270 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9271 (__v4sf)(__m128)(Y), (int)(P), \
9272 (M), R))
9273#endif
9274
9275#ifdef __OPTIMIZE__
9276extern __inline __m512
9277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9278_mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 9279{
b5fd0b71
JJ
9280 __m512 __v1_old = _mm512_undefined_ps ();
9281 __mmask16 __mask = 0xFFFF;
756c5857 9282
b5fd0b71 9283 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
9284 __addr,
9285 (__v16si) __index,
b5fd0b71 9286 __mask, __scale);
756c5857
AI
9287}
9288
9289extern __inline __m512
9290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 9291_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
0e171d54 9292 __m512i __index, void const *__addr, int __scale)
756c5857 9293{
b5fd0b71 9294 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
9295 __addr,
9296 (__v16si) __index,
9297 __mask, __scale);
9298}
9299
9300extern __inline __m512d
9301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9302_mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
756c5857 9303{
b5fd0b71
JJ
9304 __m512d __v1_old = _mm512_undefined_pd ();
9305 __mmask8 __mask = 0xFF;
756c5857 9306
b5fd0b71 9307 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
756c5857 9308 __addr,
b5fd0b71 9309 (__v8si) __index, __mask,
756c5857
AI
9310 __scale);
9311}
9312
9313extern __inline __m512d
9314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9315_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 9316 __m256i __index, void const *__addr, int __scale)
756c5857
AI
9317{
9318 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9319 __addr,
9320 (__v8si) __index,
9321 __mask, __scale);
9322}
9323
9324extern __inline __m256
9325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9326_mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 9327{
b5fd0b71
JJ
9328 __m256 __v1_old = _mm256_undefined_ps ();
9329 __mmask8 __mask = 0xFF;
756c5857 9330
b5fd0b71 9331 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
756c5857 9332 __addr,
b5fd0b71 9333 (__v8di) __index, __mask,
756c5857
AI
9334 __scale);
9335}
9336
9337extern __inline __m256
9338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9339_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
0e171d54 9340 __m512i __index, void const *__addr, int __scale)
756c5857
AI
9341{
9342 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9343 __addr,
9344 (__v8di) __index,
9345 __mask, __scale);
9346}
9347
9348extern __inline __m512d
9349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9350_mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
756c5857 9351{
b5fd0b71
JJ
9352 __m512d __v1_old = _mm512_undefined_pd ();
9353 __mmask8 __mask = 0xFF;
756c5857 9354
b5fd0b71 9355 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
756c5857 9356 __addr,
b5fd0b71 9357 (__v8di) __index, __mask,
756c5857
AI
9358 __scale);
9359}
9360
9361extern __inline __m512d
9362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9363_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 9364 __m512i __index, void const *__addr, int __scale)
756c5857
AI
9365{
9366 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9367 __addr,
9368 (__v8di) __index,
9369 __mask, __scale);
9370}
9371
9372extern __inline __m512i
9373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9374_mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 9375{
b5fd0b71
JJ
9376 __m512i __v1_old = _mm512_undefined_epi32 ();
9377 __mmask16 __mask = 0xFFFF;
756c5857 9378
b5fd0b71 9379 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
756c5857
AI
9380 __addr,
9381 (__v16si) __index,
b5fd0b71 9382 __mask, __scale);
756c5857
AI
9383}
9384
9385extern __inline __m512i
9386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9387_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
0e171d54 9388 __m512i __index, void const *__addr, int __scale)
756c5857
AI
9389{
9390 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9391 __addr,
9392 (__v16si) __index,
9393 __mask, __scale);
9394}
9395
9396extern __inline __m512i
9397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9398_mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
756c5857 9399{
b5fd0b71
JJ
9400 __m512i __v1_old = _mm512_undefined_epi32 ();
9401 __mmask8 __mask = 0xFF;
756c5857 9402
b5fd0b71 9403 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
756c5857 9404 __addr,
b5fd0b71 9405 (__v8si) __index, __mask,
756c5857
AI
9406 __scale);
9407}
9408
9409extern __inline __m512i
9410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9411_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 9412 __m256i __index, void const *__addr,
756c5857
AI
9413 int __scale)
9414{
9415 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9416 __addr,
9417 (__v8si) __index,
9418 __mask, __scale);
9419}
9420
9421extern __inline __m256i
9422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9423_mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 9424{
b5fd0b71
JJ
9425 __m256i __v1_old = _mm256_undefined_si256 ();
9426 __mmask8 __mask = 0xFF;
756c5857 9427
b5fd0b71 9428 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
756c5857
AI
9429 __addr,
9430 (__v8di) __index,
b5fd0b71 9431 __mask, __scale);
756c5857
AI
9432}
9433
9434extern __inline __m256i
9435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9436_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
0e171d54 9437 __m512i __index, void const *__addr, int __scale)
756c5857
AI
9438{
9439 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9440 __addr,
9441 (__v8di) __index,
9442 __mask, __scale);
9443}
9444
9445extern __inline __m512i
9446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9447_mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
756c5857 9448{
b5fd0b71
JJ
9449 __m512i __v1_old = _mm512_undefined_epi32 ();
9450 __mmask8 __mask = 0xFF;
756c5857 9451
b5fd0b71 9452 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
756c5857 9453 __addr,
b5fd0b71 9454 (__v8di) __index, __mask,
756c5857
AI
9455 __scale);
9456}
9457
9458extern __inline __m512i
9459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9460_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 9461 __m512i __index, void const *__addr,
756c5857
AI
9462 int __scale)
9463{
9464 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9465 __addr,
9466 (__v8di) __index,
9467 __mask, __scale);
9468}
9469
9470extern __inline void
9471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9472_mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
756c5857
AI
9473{
9474 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9475 (__v16si) __index, (__v16sf) __v1, __scale);
9476}
9477
9478extern __inline void
9479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9480_mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
756c5857
AI
9481 __m512i __index, __m512 __v1, int __scale)
9482{
9483 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9484 (__v16sf) __v1, __scale);
9485}
9486
9487extern __inline void
9488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9489_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
756c5857
AI
9490 int __scale)
9491{
9492 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9493 (__v8si) __index, (__v8df) __v1, __scale);
9494}
9495
9496extern __inline void
9497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9498_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
9499 __m256i __index, __m512d __v1, int __scale)
9500{
9501 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9502 (__v8df) __v1, __scale);
9503}
9504
9505extern __inline void
9506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9507_mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
756c5857
AI
9508{
9509 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9510 (__v8di) __index, (__v8sf) __v1, __scale);
9511}
9512
9513extern __inline void
9514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9515_mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
756c5857
AI
9516 __m512i __index, __m256 __v1, int __scale)
9517{
9518 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9519 (__v8sf) __v1, __scale);
9520}
9521
9522extern __inline void
9523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9524_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
756c5857
AI
9525 int __scale)
9526{
9527 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9528 (__v8di) __index, (__v8df) __v1, __scale);
9529}
9530
9531extern __inline void
9532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9533_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
9534 __m512i __index, __m512d __v1, int __scale)
9535{
9536 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9537 (__v8df) __v1, __scale);
9538}
9539
9540extern __inline void
9541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9542_mm512_i32scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
9543 __m512i __v1, int __scale)
9544{
9545 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9546 (__v16si) __index, (__v16si) __v1, __scale);
9547}
9548
9549extern __inline void
9550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9551_mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
756c5857
AI
9552 __m512i __index, __m512i __v1, int __scale)
9553{
9554 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9555 (__v16si) __v1, __scale);
9556}
9557
9558extern __inline void
9559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9560_mm512_i32scatter_epi64 (void *__addr, __m256i __index,
756c5857
AI
9561 __m512i __v1, int __scale)
9562{
9563 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9564 (__v8si) __index, (__v8di) __v1, __scale);
9565}
9566
9567extern __inline void
9568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9569_mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
9570 __m256i __index, __m512i __v1, int __scale)
9571{
9572 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9573 (__v8di) __v1, __scale);
9574}
9575
9576extern __inline void
9577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9578_mm512_i64scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
9579 __m256i __v1, int __scale)
9580{
9581 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9582 (__v8di) __index, (__v8si) __v1, __scale);
9583}
9584
9585extern __inline void
9586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9587_mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
756c5857
AI
9588 __m512i __index, __m256i __v1, int __scale)
9589{
9590 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9591 (__v8si) __v1, __scale);
9592}
9593
9594extern __inline void
9595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9596_mm512_i64scatter_epi64 (void *__addr, __m512i __index,
756c5857
AI
9597 __m512i __v1, int __scale)
9598{
9599 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9600 (__v8di) __index, (__v8di) __v1, __scale);
9601}
9602
9603extern __inline void
9604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9605_mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
9606 __m512i __index, __m512i __v1, int __scale)
9607{
9608 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9609 (__v8di) __v1, __scale);
9610}
9611#else
9612#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
0b192937 9613 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
0e171d54 9614 (void const *)ADDR, \
756c5857
AI
9615 (__v16si)(__m512i)INDEX, \
9616 (__mmask16)0xFFFF, (int)SCALE)
9617
9618#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9619 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
0e171d54 9620 (void const *)ADDR, \
756c5857
AI
9621 (__v16si)(__m512i)INDEX, \
9622 (__mmask16)MASK, (int)SCALE)
9623
9624#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
0b192937 9625 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
0e171d54 9626 (void const *)ADDR, \
756c5857
AI
9627 (__v8si)(__m256i)INDEX, \
9628 (__mmask8)0xFF, (int)SCALE)
9629
9630#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9631 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
0e171d54 9632 (void const *)ADDR, \
756c5857
AI
9633 (__v8si)(__m256i)INDEX, \
9634 (__mmask8)MASK, (int)SCALE)
9635
9636#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
0b192937 9637 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
0e171d54 9638 (void const *)ADDR, \
756c5857
AI
9639 (__v8di)(__m512i)INDEX, \
9640 (__mmask8)0xFF, (int)SCALE)
9641
9642#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9643 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
0e171d54 9644 (void const *)ADDR, \
756c5857
AI
9645 (__v8di)(__m512i)INDEX, \
9646 (__mmask8)MASK, (int)SCALE)
9647
9648#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
0b192937 9649 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
0e171d54 9650 (void const *)ADDR, \
756c5857
AI
9651 (__v8di)(__m512i)INDEX, \
9652 (__mmask8)0xFF, (int)SCALE)
9653
9654#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9655 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
0e171d54 9656 (void const *)ADDR, \
756c5857
AI
9657 (__v8di)(__m512i)INDEX, \
9658 (__mmask8)MASK, (int)SCALE)
9659
9660#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
4271e5cb 9661 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
0e171d54 9662 (void const *)ADDR, \
756c5857
AI
9663 (__v16si)(__m512i)INDEX, \
9664 (__mmask16)0xFFFF, (int)SCALE)
9665
9666#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9667 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
0e171d54 9668 (void const *)ADDR, \
756c5857
AI
9669 (__v16si)(__m512i)INDEX, \
9670 (__mmask16)MASK, (int)SCALE)
9671
9672#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
4271e5cb 9673 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
0e171d54 9674 (void const *)ADDR, \
756c5857
AI
9675 (__v8si)(__m256i)INDEX, \
9676 (__mmask8)0xFF, (int)SCALE)
9677
9678#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9679 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
0e171d54 9680 (void const *)ADDR, \
756c5857
AI
9681 (__v8si)(__m256i)INDEX, \
9682 (__mmask8)MASK, (int)SCALE)
9683
9684#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
0b192937 9685 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
0e171d54 9686 (void const *)ADDR, \
756c5857
AI
9687 (__v8di)(__m512i)INDEX, \
9688 (__mmask8)0xFF, (int)SCALE)
9689
9690#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9691 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
0e171d54 9692 (void const *)ADDR, \
756c5857
AI
9693 (__v8di)(__m512i)INDEX, \
9694 (__mmask8)MASK, (int)SCALE)
9695
9696#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
4271e5cb 9697 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
0e171d54 9698 (void const *)ADDR, \
756c5857
AI
9699 (__v8di)(__m512i)INDEX, \
9700 (__mmask8)0xFF, (int)SCALE)
9701
9702#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9703 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
0e171d54 9704 (void const *)ADDR, \
756c5857
AI
9705 (__v8di)(__m512i)INDEX, \
9706 (__mmask8)MASK, (int)SCALE)
9707
9708#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 9709 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
756c5857
AI
9710 (__v16si)(__m512i)INDEX, \
9711 (__v16sf)(__m512)V1, (int)SCALE)
9712
9713#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 9714 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
756c5857
AI
9715 (__v16si)(__m512i)INDEX, \
9716 (__v16sf)(__m512)V1, (int)SCALE)
9717
9718#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 9719 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
9720 (__v8si)(__m256i)INDEX, \
9721 (__v8df)(__m512d)V1, (int)SCALE)
9722
9723#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 9724 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
9725 (__v8si)(__m256i)INDEX, \
9726 (__v8df)(__m512d)V1, (int)SCALE)
9727
9728#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 9729 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
9730 (__v8di)(__m512i)INDEX, \
9731 (__v8sf)(__m256)V1, (int)SCALE)
9732
9733#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 9734 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
756c5857
AI
9735 (__v8di)(__m512i)INDEX, \
9736 (__v8sf)(__m256)V1, (int)SCALE)
9737
9738#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 9739 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
9740 (__v8di)(__m512i)INDEX, \
9741 (__v8df)(__m512d)V1, (int)SCALE)
9742
9743#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 9744 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
9745 (__v8di)(__m512i)INDEX, \
9746 (__v8df)(__m512d)V1, (int)SCALE)
9747
9748#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 9749 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
756c5857
AI
9750 (__v16si)(__m512i)INDEX, \
9751 (__v16si)(__m512i)V1, (int)SCALE)
9752
9753#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 9754 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
756c5857
AI
9755 (__v16si)(__m512i)INDEX, \
9756 (__v16si)(__m512i)V1, (int)SCALE)
9757
9758#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 9759 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
9760 (__v8si)(__m256i)INDEX, \
9761 (__v8di)(__m512i)V1, (int)SCALE)
9762
9763#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 9764 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
9765 (__v8si)(__m256i)INDEX, \
9766 (__v8di)(__m512i)V1, (int)SCALE)
9767
9768#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 9769 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
9770 (__v8di)(__m512i)INDEX, \
9771 (__v8si)(__m256i)V1, (int)SCALE)
9772
9773#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 9774 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
9775 (__v8di)(__m512i)INDEX, \
9776 (__v8si)(__m256i)V1, (int)SCALE)
9777
9778#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 9779 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
9780 (__v8di)(__m512i)INDEX, \
9781 (__v8di)(__m512i)V1, (int)SCALE)
9782
9783#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 9784 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
9785 (__v8di)(__m512i)INDEX, \
9786 (__v8di)(__m512i)V1, (int)SCALE)
9787#endif
9788
9789extern __inline __m512d
9790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9791_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9792{
9793 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9794 (__v8df) __W,
9795 (__mmask8) __U);
9796}
9797
9798extern __inline __m512d
9799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9800_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9801{
9802 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9803 (__v8df)
9804 _mm512_setzero_pd (),
9805 (__mmask8) __U);
9806}
9807
9808extern __inline void
9809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9810_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9811{
9812 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9813 (__mmask8) __U);
9814}
9815
9816extern __inline __m512
9817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9818_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9819{
9820 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9821 (__v16sf) __W,
9822 (__mmask16) __U);
9823}
9824
9825extern __inline __m512
9826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9827_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9828{
9829 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9830 (__v16sf)
9831 _mm512_setzero_ps (),
9832 (__mmask16) __U);
9833}
9834
9835extern __inline void
9836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9837_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9838{
9839 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9840 (__mmask16) __U);
9841}
9842
9843extern __inline __m512i
9844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9845_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9846{
9847 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9848 (__v8di) __W,
9849 (__mmask8) __U);
9850}
9851
9852extern __inline __m512i
9853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9854_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9855{
9856 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9857 (__v8di)
9858 _mm512_setzero_si512 (),
9859 (__mmask8) __U);
9860}
9861
9862extern __inline void
9863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9864_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9865{
9866 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9867 (__mmask8) __U);
9868}
9869
9870extern __inline __m512i
9871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9872_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9873{
9874 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9875 (__v16si) __W,
9876 (__mmask16) __U);
9877}
9878
9879extern __inline __m512i
9880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9881_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9882{
9883 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9884 (__v16si)
9885 _mm512_setzero_si512 (),
9886 (__mmask16) __U);
9887}
9888
9889extern __inline void
9890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9891_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9892{
9893 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9894 (__mmask16) __U);
9895}
9896
9897extern __inline __m512d
9898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9899_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9900{
9901 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9902 (__v8df) __W,
9903 (__mmask8) __U);
9904}
9905
9906extern __inline __m512d
9907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9908_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9909{
9910 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9911 (__v8df)
9912 _mm512_setzero_pd (),
9913 (__mmask8) __U);
9914}
9915
9916extern __inline __m512d
9917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9918_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9919{
9920 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9921 (__v8df) __W,
9922 (__mmask8) __U);
9923}
9924
9925extern __inline __m512d
9926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9927_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9928{
9929 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9930 (__v8df)
9931 _mm512_setzero_pd (),
9932 (__mmask8) __U);
9933}
9934
9935extern __inline __m512
9936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9938{
9939 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9940 (__v16sf) __W,
9941 (__mmask16) __U);
9942}
9943
9944extern __inline __m512
9945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9946_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9947{
9948 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9949 (__v16sf)
9950 _mm512_setzero_ps (),
9951 (__mmask16) __U);
9952}
9953
9954extern __inline __m512
9955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9956_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9957{
9958 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9959 (__v16sf) __W,
9960 (__mmask16) __U);
9961}
9962
9963extern __inline __m512
9964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9965_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9966{
9967 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9968 (__v16sf)
9969 _mm512_setzero_ps (),
9970 (__mmask16) __U);
9971}
9972
9973extern __inline __m512i
9974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9975_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9976{
9977 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9978 (__v8di) __W,
9979 (__mmask8) __U);
9980}
9981
9982extern __inline __m512i
9983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9984_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9985{
9986 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9987 (__v8di)
9988 _mm512_setzero_si512 (),
9989 (__mmask8) __U);
9990}
9991
9992extern __inline __m512i
9993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9994_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9995{
9996 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9997 (__v8di) __W,
9998 (__mmask8) __U);
9999}
10000
10001extern __inline __m512i
10002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10003_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10004{
10005 return (__m512i)
10006 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10007 (__v8di)
10008 _mm512_setzero_si512 (),
10009 (__mmask8) __U);
10010}
10011
10012extern __inline __m512i
10013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10014_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10015{
10016 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10017 (__v16si) __W,
10018 (__mmask16) __U);
10019}
10020
10021extern __inline __m512i
10022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10023_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10024{
10025 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10026 (__v16si)
10027 _mm512_setzero_si512 (),
10028 (__mmask16) __U);
10029}
10030
10031extern __inline __m512i
10032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10033_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10034{
10035 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10036 (__v16si) __W,
10037 (__mmask16) __U);
10038}
10039
10040extern __inline __m512i
10041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10042_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10043{
10044 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10045 (__v16si)
10046 _mm512_setzero_si512
10047 (), (__mmask16) __U);
10048}
10049
10050/* Mask arithmetic operations */
6901ea62
AS
10051#define _kand_mask16 _mm512_kand
10052#define _kandn_mask16 _mm512_kandn
10053#define _knot_mask16 _mm512_knot
10054#define _kor_mask16 _mm512_kor
10055#define _kxnor_mask16 _mm512_kxnor
10056#define _kxor_mask16 _mm512_kxor
10057
dea06111
AS
10058extern __inline unsigned char
10059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10060_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10061{
10062 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10063 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10064}
10065
10066extern __inline unsigned char
10067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10068_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10069{
10070 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10071 (__mmask16) __B);
10072}
10073
10074extern __inline unsigned char
10075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10076_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10077{
10078 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10079 (__mmask16) __B);
10080}
10081
7cdb6e4c
AS
10082extern __inline unsigned int
10083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10084_cvtmask16_u32 (__mmask16 __A)
10085{
10086 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10087}
10088
10089extern __inline __mmask16
10090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10091_cvtu32_mask16 (unsigned int __A)
10092{
10093 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10094}
10095
10096extern __inline __mmask16
10097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10098_load_mask16 (__mmask16 *__A)
10099{
10100 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10101}
10102
10103extern __inline void
10104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105_store_mask16 (__mmask16 *__A, __mmask16 __B)
10106{
10107 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10108}
10109
756c5857
AI
10110extern __inline __mmask16
10111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10112_mm512_kand (__mmask16 __A, __mmask16 __B)
10113{
10114 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10115}
10116
10117extern __inline __mmask16
10118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10119_mm512_kandn (__mmask16 __A, __mmask16 __B)
10120{
6901ea62
AS
10121 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10122 (__mmask16) __B);
756c5857
AI
10123}
10124
10125extern __inline __mmask16
10126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10127_mm512_kor (__mmask16 __A, __mmask16 __B)
10128{
10129 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10130}
10131
10132extern __inline int
10133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10134_mm512_kortestz (__mmask16 __A, __mmask16 __B)
10135{
10136 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10137 (__mmask16) __B);
10138}
10139
10140extern __inline int
10141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10142_mm512_kortestc (__mmask16 __A, __mmask16 __B)
10143{
10144 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10145 (__mmask16) __B);
10146}
10147
10148extern __inline __mmask16
10149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10150_mm512_kxnor (__mmask16 __A, __mmask16 __B)
10151{
10152 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10153}
10154
10155extern __inline __mmask16
10156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10157_mm512_kxor (__mmask16 __A, __mmask16 __B)
10158{
10159 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10160}
10161
10162extern __inline __mmask16
10163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10164_mm512_knot (__mmask16 __A)
10165{
10166 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10167}
10168
10169extern __inline __mmask16
10170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10171_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10172{
10173 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10174}
10175
6901ea62
AS
10176extern __inline __mmask16
10177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10178_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10179{
10180 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10181}
10182
756c5857
AI
10183#ifdef __OPTIMIZE__
10184extern __inline __m512i
10185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10186_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10187 const int __imm)
10188{
10189 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10190 (__v4si) __D,
10191 __imm,
10192 (__v16si)
10193 _mm512_setzero_si512 (),
10194 __B);
10195}
10196
10197extern __inline __m512
10198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10199_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10200 const int __imm)
10201{
10202 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10203 (__v4sf) __D,
10204 __imm,
10205 (__v16sf)
10206 _mm512_setzero_ps (), __B);
10207}
10208
10209extern __inline __m512i
10210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10211_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10212 __m128i __D, const int __imm)
10213{
10214 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10215 (__v4si) __D,
10216 __imm,
10217 (__v16si) __A,
10218 __B);
10219}
10220
10221extern __inline __m512
10222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10223_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10224 __m128 __D, const int __imm)
10225{
10226 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10227 (__v4sf) __D,
10228 __imm,
10229 (__v16sf) __A, __B);
10230}
10231#else
10232#define _mm512_maskz_insertf32x4(A, X, Y, C) \
10233 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10234 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10235 (__mmask8)(A)))
10236
10237#define _mm512_maskz_inserti32x4(A, X, Y, C) \
10238 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10239 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10240 (__mmask8)(A)))
10241
10242#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10243 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10244 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10245 (__mmask8)(B)))
10246
10247#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10248 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10249 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10250 (__mmask8)(B)))
10251#endif
10252
10253extern __inline __m512i
10254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10255_mm512_max_epi64 (__m512i __A, __m512i __B)
10256{
10257 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10258 (__v8di) __B,
10259 (__v8di)
4271e5cb 10260 _mm512_undefined_epi32 (),
756c5857
AI
10261 (__mmask8) -1);
10262}
10263
10264extern __inline __m512i
10265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10266_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10267{
10268 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10269 (__v8di) __B,
10270 (__v8di)
10271 _mm512_setzero_si512 (),
10272 __M);
10273}
10274
10275extern __inline __m512i
10276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10278{
10279 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10280 (__v8di) __B,
10281 (__v8di) __W, __M);
10282}
10283
10284extern __inline __m512i
10285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10286_mm512_min_epi64 (__m512i __A, __m512i __B)
10287{
10288 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10289 (__v8di) __B,
10290 (__v8di)
4271e5cb 10291 _mm512_undefined_epi32 (),
756c5857
AI
10292 (__mmask8) -1);
10293}
10294
10295extern __inline __m512i
10296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10297_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10298{
10299 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10300 (__v8di) __B,
10301 (__v8di) __W, __M);
10302}
10303
10304extern __inline __m512i
10305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10306_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10307{
10308 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10309 (__v8di) __B,
10310 (__v8di)
10311 _mm512_setzero_si512 (),
10312 __M);
10313}
10314
10315extern __inline __m512i
10316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10317_mm512_max_epu64 (__m512i __A, __m512i __B)
10318{
10319 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10320 (__v8di) __B,
10321 (__v8di)
4271e5cb 10322 _mm512_undefined_epi32 (),
756c5857
AI
10323 (__mmask8) -1);
10324}
10325
10326extern __inline __m512i
10327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10328_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10329{
10330 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10331 (__v8di) __B,
10332 (__v8di)
10333 _mm512_setzero_si512 (),
10334 __M);
10335}
10336
10337extern __inline __m512i
10338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10339_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10340{
10341 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10342 (__v8di) __B,
10343 (__v8di) __W, __M);
10344}
10345
10346extern __inline __m512i
10347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10348_mm512_min_epu64 (__m512i __A, __m512i __B)
10349{
10350 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10351 (__v8di) __B,
10352 (__v8di)
4271e5cb 10353 _mm512_undefined_epi32 (),
756c5857
AI
10354 (__mmask8) -1);
10355}
10356
10357extern __inline __m512i
10358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10359_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10360{
10361 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10362 (__v8di) __B,
10363 (__v8di) __W, __M);
10364}
10365
10366extern __inline __m512i
10367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10368_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10369{
10370 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10371 (__v8di) __B,
10372 (__v8di)
10373 _mm512_setzero_si512 (),
10374 __M);
10375}
10376
10377extern __inline __m512i
10378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10379_mm512_max_epi32 (__m512i __A, __m512i __B)
10380{
10381 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10382 (__v16si) __B,
10383 (__v16si)
4271e5cb 10384 _mm512_undefined_epi32 (),
756c5857
AI
10385 (__mmask16) -1);
10386}
10387
10388extern __inline __m512i
10389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10390_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10391{
10392 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10393 (__v16si) __B,
10394 (__v16si)
10395 _mm512_setzero_si512 (),
10396 __M);
10397}
10398
10399extern __inline __m512i
10400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10401_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10402{
10403 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10404 (__v16si) __B,
10405 (__v16si) __W, __M);
10406}
10407
10408extern __inline __m512i
10409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10410_mm512_min_epi32 (__m512i __A, __m512i __B)
10411{
10412 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10413 (__v16si) __B,
10414 (__v16si)
4271e5cb 10415 _mm512_undefined_epi32 (),
756c5857
AI
10416 (__mmask16) -1);
10417}
10418
10419extern __inline __m512i
10420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10422{
10423 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10424 (__v16si) __B,
10425 (__v16si)
10426 _mm512_setzero_si512 (),
10427 __M);
10428}
10429
10430extern __inline __m512i
10431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10432_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10433{
10434 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10435 (__v16si) __B,
10436 (__v16si) __W, __M);
10437}
10438
10439extern __inline __m512i
10440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10441_mm512_max_epu32 (__m512i __A, __m512i __B)
10442{
10443 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10444 (__v16si) __B,
10445 (__v16si)
4271e5cb 10446 _mm512_undefined_epi32 (),
756c5857
AI
10447 (__mmask16) -1);
10448}
10449
10450extern __inline __m512i
10451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10452_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10453{
10454 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10455 (__v16si) __B,
10456 (__v16si)
10457 _mm512_setzero_si512 (),
10458 __M);
10459}
10460
10461extern __inline __m512i
10462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10463_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10464{
10465 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10466 (__v16si) __B,
10467 (__v16si) __W, __M);
10468}
10469
10470extern __inline __m512i
10471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10472_mm512_min_epu32 (__m512i __A, __m512i __B)
10473{
10474 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10475 (__v16si) __B,
10476 (__v16si)
4271e5cb 10477 _mm512_undefined_epi32 (),
756c5857
AI
10478 (__mmask16) -1);
10479}
10480
10481extern __inline __m512i
10482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10483_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10484{
10485 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10486 (__v16si) __B,
10487 (__v16si)
10488 _mm512_setzero_si512 (),
10489 __M);
10490}
10491
10492extern __inline __m512i
10493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10494_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10495{
10496 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10497 (__v16si) __B,
10498 (__v16si) __W, __M);
10499}
10500
10501extern __inline __m512
10502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10503_mm512_unpacklo_ps (__m512 __A, __m512 __B)
10504{
10505 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10506 (__v16sf) __B,
10507 (__v16sf)
0b192937 10508 _mm512_undefined_ps (),
756c5857
AI
10509 (__mmask16) -1);
10510}
10511
10512extern __inline __m512
10513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10514_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10515{
10516 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10517 (__v16sf) __B,
10518 (__v16sf) __W,
10519 (__mmask16) __U);
10520}
10521
10522extern __inline __m512
10523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10524_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10525{
10526 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10527 (__v16sf) __B,
10528 (__v16sf)
10529 _mm512_setzero_ps (),
10530 (__mmask16) __U);
10531}
10532
075691af
AI
10533#ifdef __OPTIMIZE__
10534extern __inline __m128d
10535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10536_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10537{
10538 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10539 (__v2df) __B,
10540 __R);
10541}
10542
10543extern __inline __m128
10544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10545_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10546{
10547 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10548 (__v4sf) __B,
10549 __R);
10550}
10551
10552extern __inline __m128d
10553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10554_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10555{
10556 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10557 (__v2df) __B,
10558 __R);
10559}
10560
10561extern __inline __m128
10562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10563_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10564{
10565 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10566 (__v4sf) __B,
10567 __R);
10568}
10569
10570#else
10571#define _mm_max_round_sd(A, B, C) \
10572 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10573
10574#define _mm_max_round_ss(A, B, C) \
10575 (__m128)__builtin_ia32_addss_round(A, B, C)
10576
10577#define _mm_min_round_sd(A, B, C) \
10578 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10579
10580#define _mm_min_round_ss(A, B, C) \
10581 (__m128)__builtin_ia32_subss_round(A, B, C)
10582#endif
10583
756c5857
AI
10584extern __inline __m512d
10585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10586_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10587{
10588 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10589 (__v8df) __W,
10590 (__mmask8) __U);
10591}
10592
10593extern __inline __m512
10594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10595_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10596{
10597 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10598 (__v16sf) __W,
10599 (__mmask16) __U);
10600}
10601
10602extern __inline __m512i
10603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10604_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10605{
10606 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10607 (__v8di) __W,
10608 (__mmask8) __U);
10609}
10610
10611extern __inline __m512i
10612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10613_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10614{
10615 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10616 (__v16si) __W,
10617 (__mmask16) __U);
10618}
10619
075691af
AI
10620#ifdef __OPTIMIZE__
10621extern __inline __m128d
10622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10623_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10624{
10625 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10626 (__v2df) __A,
10627 (__v2df) __B,
10628 __R);
10629}
10630
10631extern __inline __m128
10632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10633_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10634{
10635 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10636 (__v4sf) __A,
10637 (__v4sf) __B,
10638 __R);
10639}
10640
10641extern __inline __m128d
10642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10643_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10644{
10645 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10646 (__v2df) __A,
10647 -(__v2df) __B,
10648 __R);
10649}
10650
10651extern __inline __m128
10652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10653_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10654{
10655 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10656 (__v4sf) __A,
10657 -(__v4sf) __B,
10658 __R);
10659}
10660
10661extern __inline __m128d
10662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10663_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10664{
10665 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10666 -(__v2df) __A,
10667 (__v2df) __B,
10668 __R);
10669}
10670
10671extern __inline __m128
10672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10673_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10674{
10675 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10676 -(__v4sf) __A,
10677 (__v4sf) __B,
10678 __R);
10679}
10680
10681extern __inline __m128d
10682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10683_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10684{
10685 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10686 -(__v2df) __A,
10687 -(__v2df) __B,
10688 __R);
10689}
10690
10691extern __inline __m128
10692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10693_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10694{
10695 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10696 -(__v4sf) __A,
10697 -(__v4sf) __B,
10698 __R);
10699}
10700#else
10701#define _mm_fmadd_round_sd(A, B, C, R) \
10702 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10703
10704#define _mm_fmadd_round_ss(A, B, C, R) \
10705 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10706
10707#define _mm_fmsub_round_sd(A, B, C, R) \
10708 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10709
10710#define _mm_fmsub_round_ss(A, B, C, R) \
10711 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10712
10713#define _mm_fnmadd_round_sd(A, B, C, R) \
10714 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10715
10716#define _mm_fnmadd_round_ss(A, B, C, R) \
10717 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10718
10719#define _mm_fnmsub_round_sd(A, B, C, R) \
10720 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10721
10722#define _mm_fnmsub_round_ss(A, B, C, R) \
10723 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10724#endif
10725
756c5857
AI
10726#ifdef __OPTIMIZE__
10727extern __inline int
10728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10729_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10730{
10731 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10732}
10733
10734extern __inline int
10735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10737{
10738 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10739}
10740#else
10741#define _mm_comi_round_ss(A, B, C, D)\
10742__builtin_ia32_vcomiss(A, B, C, D)
10743#define _mm_comi_round_sd(A, B, C, D)\
10744__builtin_ia32_vcomisd(A, B, C, D)
10745#endif
10746
10747extern __inline __m512d
10748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10749_mm512_sqrt_pd (__m512d __A)
10750{
10751 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10752 (__v8df)
0b192937 10753 _mm512_undefined_pd (),
756c5857
AI
10754 (__mmask8) -1,
10755 _MM_FROUND_CUR_DIRECTION);
10756}
10757
10758extern __inline __m512d
10759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10760_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10761{
10762 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10763 (__v8df) __W,
10764 (__mmask8) __U,
10765 _MM_FROUND_CUR_DIRECTION);
10766}
10767
10768extern __inline __m512d
10769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10770_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10771{
10772 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10773 (__v8df)
10774 _mm512_setzero_pd (),
10775 (__mmask8) __U,
10776 _MM_FROUND_CUR_DIRECTION);
10777}
10778
10779extern __inline __m512
10780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10781_mm512_sqrt_ps (__m512 __A)
10782{
10783 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10784 (__v16sf)
0b192937 10785 _mm512_undefined_ps (),
756c5857
AI
10786 (__mmask16) -1,
10787 _MM_FROUND_CUR_DIRECTION);
10788}
10789
10790extern __inline __m512
10791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10792_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10793{
10794 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10795 (__v16sf) __W,
10796 (__mmask16) __U,
10797 _MM_FROUND_CUR_DIRECTION);
10798}
10799
10800extern __inline __m512
10801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10802_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10803{
10804 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10805 (__v16sf)
10806 _mm512_setzero_ps (),
10807 (__mmask16) __U,
10808 _MM_FROUND_CUR_DIRECTION);
10809}
10810
10811extern __inline __m512d
10812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10813_mm512_add_pd (__m512d __A, __m512d __B)
10814{
2069d6fc 10815 return (__m512d) ((__v8df)__A + (__v8df)__B);
756c5857
AI
10816}
10817
10818extern __inline __m512d
10819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10820_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10821{
10822 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10823 (__v8df) __B,
10824 (__v8df) __W,
10825 (__mmask8) __U,
10826 _MM_FROUND_CUR_DIRECTION);
10827}
10828
10829extern __inline __m512d
10830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10831_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10832{
10833 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10834 (__v8df) __B,
10835 (__v8df)
10836 _mm512_setzero_pd (),
10837 (__mmask8) __U,
10838 _MM_FROUND_CUR_DIRECTION);
10839}
10840
10841extern __inline __m512
10842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10843_mm512_add_ps (__m512 __A, __m512 __B)
10844{
2069d6fc 10845 return (__m512) ((__v16sf)__A + (__v16sf)__B);
756c5857
AI
10846}
10847
10848extern __inline __m512
10849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10850_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10851{
10852 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10853 (__v16sf) __B,
10854 (__v16sf) __W,
10855 (__mmask16) __U,
10856 _MM_FROUND_CUR_DIRECTION);
10857}
10858
10859extern __inline __m512
10860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10861_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10862{
10863 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10864 (__v16sf) __B,
10865 (__v16sf)
10866 _mm512_setzero_ps (),
10867 (__mmask16) __U,
10868 _MM_FROUND_CUR_DIRECTION);
10869}
10870
10871extern __inline __m512d
10872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10873_mm512_sub_pd (__m512d __A, __m512d __B)
10874{
2069d6fc 10875 return (__m512d) ((__v8df)__A - (__v8df)__B);
756c5857
AI
10876}
10877
10878extern __inline __m512d
10879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10880_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10881{
10882 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10883 (__v8df) __B,
10884 (__v8df) __W,
10885 (__mmask8) __U,
10886 _MM_FROUND_CUR_DIRECTION);
10887}
10888
10889extern __inline __m512d
10890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10892{
10893 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10894 (__v8df) __B,
10895 (__v8df)
10896 _mm512_setzero_pd (),
10897 (__mmask8) __U,
10898 _MM_FROUND_CUR_DIRECTION);
10899}
10900
10901extern __inline __m512
10902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10903_mm512_sub_ps (__m512 __A, __m512 __B)
10904{
2069d6fc 10905 return (__m512) ((__v16sf)__A - (__v16sf)__B);
756c5857
AI
10906}
10907
10908extern __inline __m512
10909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10910_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10911{
10912 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10913 (__v16sf) __B,
10914 (__v16sf) __W,
10915 (__mmask16) __U,
10916 _MM_FROUND_CUR_DIRECTION);
10917}
10918
10919extern __inline __m512
10920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10921_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10922{
10923 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10924 (__v16sf) __B,
10925 (__v16sf)
10926 _mm512_setzero_ps (),
10927 (__mmask16) __U,
10928 _MM_FROUND_CUR_DIRECTION);
10929}
10930
10931extern __inline __m512d
10932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10933_mm512_mul_pd (__m512d __A, __m512d __B)
10934{
2069d6fc 10935 return (__m512d) ((__v8df)__A * (__v8df)__B);
756c5857
AI
10936}
10937
10938extern __inline __m512d
10939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10940_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10941{
10942 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10943 (__v8df) __B,
10944 (__v8df) __W,
10945 (__mmask8) __U,
10946 _MM_FROUND_CUR_DIRECTION);
10947}
10948
10949extern __inline __m512d
10950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10951_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10952{
10953 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10954 (__v8df) __B,
10955 (__v8df)
10956 _mm512_setzero_pd (),
10957 (__mmask8) __U,
10958 _MM_FROUND_CUR_DIRECTION);
10959}
10960
10961extern __inline __m512
10962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10963_mm512_mul_ps (__m512 __A, __m512 __B)
10964{
2069d6fc 10965 return (__m512) ((__v16sf)__A * (__v16sf)__B);
756c5857
AI
10966}
10967
10968extern __inline __m512
10969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10970_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10971{
10972 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10973 (__v16sf) __B,
10974 (__v16sf) __W,
10975 (__mmask16) __U,
10976 _MM_FROUND_CUR_DIRECTION);
10977}
10978
10979extern __inline __m512
10980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10981_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10982{
10983 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10984 (__v16sf) __B,
10985 (__v16sf)
10986 _mm512_setzero_ps (),
10987 (__mmask16) __U,
10988 _MM_FROUND_CUR_DIRECTION);
10989}
10990
10991extern __inline __m512d
10992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10993_mm512_div_pd (__m512d __M, __m512d __V)
10994{
2069d6fc 10995 return (__m512d) ((__v8df)__M / (__v8df)__V);
756c5857
AI
10996}
10997
10998extern __inline __m512d
10999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11000_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
11001{
11002 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11003 (__v8df) __V,
11004 (__v8df) __W,
11005 (__mmask8) __U,
11006 _MM_FROUND_CUR_DIRECTION);
11007}
11008
11009extern __inline __m512d
11010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11011_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
11012{
11013 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11014 (__v8df) __V,
11015 (__v8df)
11016 _mm512_setzero_pd (),
11017 (__mmask8) __U,
11018 _MM_FROUND_CUR_DIRECTION);
11019}
11020
11021extern __inline __m512
11022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023_mm512_div_ps (__m512 __A, __m512 __B)
11024{
2069d6fc 11025 return (__m512) ((__v16sf)__A / (__v16sf)__B);
756c5857
AI
11026}
11027
11028extern __inline __m512
11029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11030_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11031{
11032 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11033 (__v16sf) __B,
11034 (__v16sf) __W,
11035 (__mmask16) __U,
11036 _MM_FROUND_CUR_DIRECTION);
11037}
11038
11039extern __inline __m512
11040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11041_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11042{
11043 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11044 (__v16sf) __B,
11045 (__v16sf)
11046 _mm512_setzero_ps (),
11047 (__mmask16) __U,
11048 _MM_FROUND_CUR_DIRECTION);
11049}
11050
11051extern __inline __m512d
11052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11053_mm512_max_pd (__m512d __A, __m512d __B)
11054{
11055 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11056 (__v8df) __B,
11057 (__v8df)
0b192937 11058 _mm512_undefined_pd (),
756c5857
AI
11059 (__mmask8) -1,
11060 _MM_FROUND_CUR_DIRECTION);
11061}
11062
11063extern __inline __m512d
11064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11065_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11066{
11067 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11068 (__v8df) __B,
11069 (__v8df) __W,
11070 (__mmask8) __U,
11071 _MM_FROUND_CUR_DIRECTION);
11072}
11073
11074extern __inline __m512d
11075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11076_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
11077{
11078 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11079 (__v8df) __B,
11080 (__v8df)
11081 _mm512_setzero_pd (),
11082 (__mmask8) __U,
11083 _MM_FROUND_CUR_DIRECTION);
11084}
11085
11086extern __inline __m512
11087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11088_mm512_max_ps (__m512 __A, __m512 __B)
11089{
11090 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11091 (__v16sf) __B,
11092 (__v16sf)
0b192937 11093 _mm512_undefined_ps (),
756c5857
AI
11094 (__mmask16) -1,
11095 _MM_FROUND_CUR_DIRECTION);
11096}
11097
11098extern __inline __m512
11099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11100_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11101{
11102 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11103 (__v16sf) __B,
11104 (__v16sf) __W,
11105 (__mmask16) __U,
11106 _MM_FROUND_CUR_DIRECTION);
11107}
11108
11109extern __inline __m512
11110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11111_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11112{
11113 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11114 (__v16sf) __B,
11115 (__v16sf)
11116 _mm512_setzero_ps (),
11117 (__mmask16) __U,
11118 _MM_FROUND_CUR_DIRECTION);
11119}
11120
11121extern __inline __m512d
11122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11123_mm512_min_pd (__m512d __A, __m512d __B)
11124{
11125 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11126 (__v8df) __B,
11127 (__v8df)
0b192937 11128 _mm512_undefined_pd (),
756c5857
AI
11129 (__mmask8) -1,
11130 _MM_FROUND_CUR_DIRECTION);
11131}
11132
11133extern __inline __m512d
11134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11135_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11136{
11137 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11138 (__v8df) __B,
11139 (__v8df) __W,
11140 (__mmask8) __U,
11141 _MM_FROUND_CUR_DIRECTION);
11142}
11143
11144extern __inline __m512d
11145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11146_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11147{
11148 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11149 (__v8df) __B,
11150 (__v8df)
11151 _mm512_setzero_pd (),
11152 (__mmask8) __U,
11153 _MM_FROUND_CUR_DIRECTION);
11154}
11155
11156extern __inline __m512
11157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158_mm512_min_ps (__m512 __A, __m512 __B)
11159{
11160 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11161 (__v16sf) __B,
11162 (__v16sf)
0b192937 11163 _mm512_undefined_ps (),
756c5857
AI
11164 (__mmask16) -1,
11165 _MM_FROUND_CUR_DIRECTION);
11166}
11167
11168extern __inline __m512
11169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11170_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11171{
11172 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11173 (__v16sf) __B,
11174 (__v16sf) __W,
11175 (__mmask16) __U,
11176 _MM_FROUND_CUR_DIRECTION);
11177}
11178
11179extern __inline __m512
11180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11181_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11182{
11183 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11184 (__v16sf) __B,
11185 (__v16sf)
11186 _mm512_setzero_ps (),
11187 (__mmask16) __U,
11188 _MM_FROUND_CUR_DIRECTION);
11189}
11190
11191extern __inline __m512d
11192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11193_mm512_scalef_pd (__m512d __A, __m512d __B)
11194{
11195 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11196 (__v8df) __B,
11197 (__v8df)
0b192937 11198 _mm512_undefined_pd (),
756c5857
AI
11199 (__mmask8) -1,
11200 _MM_FROUND_CUR_DIRECTION);
11201}
11202
11203extern __inline __m512d
11204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11205_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11206{
11207 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11208 (__v8df) __B,
11209 (__v8df) __W,
11210 (__mmask8) __U,
11211 _MM_FROUND_CUR_DIRECTION);
11212}
11213
11214extern __inline __m512d
11215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11216_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11217{
11218 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11219 (__v8df) __B,
11220 (__v8df)
11221 _mm512_setzero_pd (),
11222 (__mmask8) __U,
11223 _MM_FROUND_CUR_DIRECTION);
11224}
11225
11226extern __inline __m512
11227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11228_mm512_scalef_ps (__m512 __A, __m512 __B)
11229{
11230 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11231 (__v16sf) __B,
11232 (__v16sf)
0b192937 11233 _mm512_undefined_ps (),
756c5857
AI
11234 (__mmask16) -1,
11235 _MM_FROUND_CUR_DIRECTION);
11236}
11237
11238extern __inline __m512
11239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11240_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11241{
11242 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11243 (__v16sf) __B,
11244 (__v16sf) __W,
11245 (__mmask16) __U,
11246 _MM_FROUND_CUR_DIRECTION);
11247}
11248
11249extern __inline __m512
11250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11251_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11252{
11253 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11254 (__v16sf) __B,
11255 (__v16sf)
11256 _mm512_setzero_ps (),
11257 (__mmask16) __U,
11258 _MM_FROUND_CUR_DIRECTION);
11259}
11260
075691af
AI
11261extern __inline __m128d
11262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11263_mm_scalef_sd (__m128d __A, __m128d __B)
11264{
11265 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11266 (__v2df) __B,
11267 _MM_FROUND_CUR_DIRECTION);
11268}
11269
11270extern __inline __m128
11271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11272_mm_scalef_ss (__m128 __A, __m128 __B)
11273{
11274 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11275 (__v4sf) __B,
11276 _MM_FROUND_CUR_DIRECTION);
11277}
11278
756c5857
AI
11279extern __inline __m512d
11280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11281_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11282{
11283 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11284 (__v8df) __B,
11285 (__v8df) __C,
11286 (__mmask8) -1,
11287 _MM_FROUND_CUR_DIRECTION);
11288}
11289
11290extern __inline __m512d
11291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11292_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11293{
11294 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11295 (__v8df) __B,
11296 (__v8df) __C,
11297 (__mmask8) __U,
11298 _MM_FROUND_CUR_DIRECTION);
11299}
11300
11301extern __inline __m512d
11302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11303_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11304{
11305 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11306 (__v8df) __B,
11307 (__v8df) __C,
11308 (__mmask8) __U,
11309 _MM_FROUND_CUR_DIRECTION);
11310}
11311
11312extern __inline __m512d
11313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11314_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11315{
11316 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11317 (__v8df) __B,
11318 (__v8df) __C,
11319 (__mmask8) __U,
11320 _MM_FROUND_CUR_DIRECTION);
11321}
11322
11323extern __inline __m512
11324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11325_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11326{
11327 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11328 (__v16sf) __B,
11329 (__v16sf) __C,
11330 (__mmask16) -1,
11331 _MM_FROUND_CUR_DIRECTION);
11332}
11333
11334extern __inline __m512
11335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11336_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11337{
11338 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11339 (__v16sf) __B,
11340 (__v16sf) __C,
11341 (__mmask16) __U,
11342 _MM_FROUND_CUR_DIRECTION);
11343}
11344
11345extern __inline __m512
11346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11347_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11348{
11349 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11350 (__v16sf) __B,
11351 (__v16sf) __C,
11352 (__mmask16) __U,
11353 _MM_FROUND_CUR_DIRECTION);
11354}
11355
11356extern __inline __m512
11357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11358_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11359{
11360 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11361 (__v16sf) __B,
11362 (__v16sf) __C,
11363 (__mmask16) __U,
11364 _MM_FROUND_CUR_DIRECTION);
11365}
11366
11367extern __inline __m512d
11368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11369_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11370{
11371 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11372 (__v8df) __B,
11373 -(__v8df) __C,
11374 (__mmask8) -1,
11375 _MM_FROUND_CUR_DIRECTION);
11376}
11377
11378extern __inline __m512d
11379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11380_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11381{
11382 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11383 (__v8df) __B,
11384 -(__v8df) __C,
11385 (__mmask8) __U,
11386 _MM_FROUND_CUR_DIRECTION);
11387}
11388
11389extern __inline __m512d
11390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11391_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11392{
11393 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11394 (__v8df) __B,
11395 (__v8df) __C,
11396 (__mmask8) __U,
11397 _MM_FROUND_CUR_DIRECTION);
11398}
11399
11400extern __inline __m512d
11401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11402_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11403{
11404 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11405 (__v8df) __B,
11406 -(__v8df) __C,
11407 (__mmask8) __U,
11408 _MM_FROUND_CUR_DIRECTION);
11409}
11410
11411extern __inline __m512
11412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11413_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11414{
11415 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11416 (__v16sf) __B,
11417 -(__v16sf) __C,
11418 (__mmask16) -1,
11419 _MM_FROUND_CUR_DIRECTION);
11420}
11421
11422extern __inline __m512
11423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11424_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11425{
11426 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11427 (__v16sf) __B,
11428 -(__v16sf) __C,
11429 (__mmask16) __U,
11430 _MM_FROUND_CUR_DIRECTION);
11431}
11432
11433extern __inline __m512
11434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11435_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11436{
11437 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11438 (__v16sf) __B,
11439 (__v16sf) __C,
11440 (__mmask16) __U,
11441 _MM_FROUND_CUR_DIRECTION);
11442}
11443
11444extern __inline __m512
11445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11446_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11447{
11448 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11449 (__v16sf) __B,
11450 -(__v16sf) __C,
11451 (__mmask16) __U,
11452 _MM_FROUND_CUR_DIRECTION);
11453}
11454
11455extern __inline __m512d
11456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11457_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11458{
11459 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11460 (__v8df) __B,
11461 (__v8df) __C,
11462 (__mmask8) -1,
11463 _MM_FROUND_CUR_DIRECTION);
11464}
11465
11466extern __inline __m512d
11467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11468_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11469{
11470 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11471 (__v8df) __B,
11472 (__v8df) __C,
11473 (__mmask8) __U,
11474 _MM_FROUND_CUR_DIRECTION);
11475}
11476
11477extern __inline __m512d
11478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11479_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11480{
11481 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11482 (__v8df) __B,
11483 (__v8df) __C,
11484 (__mmask8) __U,
11485 _MM_FROUND_CUR_DIRECTION);
11486}
11487
11488extern __inline __m512d
11489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11490_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11491{
11492 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11493 (__v8df) __B,
11494 (__v8df) __C,
11495 (__mmask8) __U,
11496 _MM_FROUND_CUR_DIRECTION);
11497}
11498
11499extern __inline __m512
11500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11501_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11502{
11503 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11504 (__v16sf) __B,
11505 (__v16sf) __C,
11506 (__mmask16) -1,
11507 _MM_FROUND_CUR_DIRECTION);
11508}
11509
11510extern __inline __m512
11511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11512_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11513{
11514 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11515 (__v16sf) __B,
11516 (__v16sf) __C,
11517 (__mmask16) __U,
11518 _MM_FROUND_CUR_DIRECTION);
11519}
11520
11521extern __inline __m512
11522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11523_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11524{
11525 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11526 (__v16sf) __B,
11527 (__v16sf) __C,
11528 (__mmask16) __U,
11529 _MM_FROUND_CUR_DIRECTION);
11530}
11531
11532extern __inline __m512
11533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11534_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11535{
11536 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11537 (__v16sf) __B,
11538 (__v16sf) __C,
11539 (__mmask16) __U,
11540 _MM_FROUND_CUR_DIRECTION);
11541}
11542
11543extern __inline __m512d
11544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11545_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11546{
11547 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11548 (__v8df) __B,
11549 -(__v8df) __C,
11550 (__mmask8) -1,
11551 _MM_FROUND_CUR_DIRECTION);
11552}
11553
11554extern __inline __m512d
11555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11556_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11557{
11558 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11559 (__v8df) __B,
11560 -(__v8df) __C,
11561 (__mmask8) __U,
11562 _MM_FROUND_CUR_DIRECTION);
11563}
11564
11565extern __inline __m512d
11566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11567_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11568{
11569 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11570 (__v8df) __B,
11571 (__v8df) __C,
11572 (__mmask8) __U,
11573 _MM_FROUND_CUR_DIRECTION);
11574}
11575
11576extern __inline __m512d
11577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11578_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11579{
11580 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11581 (__v8df) __B,
11582 -(__v8df) __C,
11583 (__mmask8) __U,
11584 _MM_FROUND_CUR_DIRECTION);
11585}
11586
11587extern __inline __m512
11588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11589_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11590{
11591 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11592 (__v16sf) __B,
11593 -(__v16sf) __C,
11594 (__mmask16) -1,
11595 _MM_FROUND_CUR_DIRECTION);
11596}
11597
11598extern __inline __m512
11599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11600_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11601{
11602 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11603 (__v16sf) __B,
11604 -(__v16sf) __C,
11605 (__mmask16) __U,
11606 _MM_FROUND_CUR_DIRECTION);
11607}
11608
11609extern __inline __m512
11610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11611_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11612{
11613 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11614 (__v16sf) __B,
11615 (__v16sf) __C,
11616 (__mmask16) __U,
11617 _MM_FROUND_CUR_DIRECTION);
11618}
11619
11620extern __inline __m512
11621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11622_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11623{
11624 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11625 (__v16sf) __B,
11626 -(__v16sf) __C,
11627 (__mmask16) __U,
11628 _MM_FROUND_CUR_DIRECTION);
11629}
11630
11631extern __inline __m512d
11632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11634{
11635 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11636 (__v8df) __B,
11637 (__v8df) __C,
11638 (__mmask8) -1,
11639 _MM_FROUND_CUR_DIRECTION);
11640}
11641
11642extern __inline __m512d
11643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11644_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11645{
11646 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11647 (__v8df) __B,
11648 (__v8df) __C,
11649 (__mmask8) __U,
11650 _MM_FROUND_CUR_DIRECTION);
11651}
11652
11653extern __inline __m512d
11654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11655_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11656{
11657 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11658 (__v8df) __B,
11659 (__v8df) __C,
11660 (__mmask8) __U,
11661 _MM_FROUND_CUR_DIRECTION);
11662}
11663
11664extern __inline __m512d
11665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11666_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11667{
11668 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11669 (__v8df) __B,
11670 (__v8df) __C,
11671 (__mmask8) __U,
11672 _MM_FROUND_CUR_DIRECTION);
11673}
11674
11675extern __inline __m512
11676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11677_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11678{
11679 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11680 (__v16sf) __B,
11681 (__v16sf) __C,
11682 (__mmask16) -1,
11683 _MM_FROUND_CUR_DIRECTION);
11684}
11685
11686extern __inline __m512
11687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11688_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11689{
11690 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11691 (__v16sf) __B,
11692 (__v16sf) __C,
11693 (__mmask16) __U,
11694 _MM_FROUND_CUR_DIRECTION);
11695}
11696
11697extern __inline __m512
11698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11700{
11701 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11702 (__v16sf) __B,
11703 (__v16sf) __C,
11704 (__mmask16) __U,
11705 _MM_FROUND_CUR_DIRECTION);
11706}
11707
11708extern __inline __m512
11709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11710_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11711{
11712 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11713 (__v16sf) __B,
11714 (__v16sf) __C,
11715 (__mmask16) __U,
11716 _MM_FROUND_CUR_DIRECTION);
11717}
11718
11719extern __inline __m512d
11720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11721_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11722{
11723 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11724 (__v8df) __B,
11725 -(__v8df) __C,
11726 (__mmask8) -1,
11727 _MM_FROUND_CUR_DIRECTION);
11728}
11729
11730extern __inline __m512d
11731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11732_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11733{
11734 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11735 (__v8df) __B,
11736 (__v8df) __C,
11737 (__mmask8) __U,
11738 _MM_FROUND_CUR_DIRECTION);
11739}
11740
11741extern __inline __m512d
11742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11743_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11744{
11745 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11746 (__v8df) __B,
11747 (__v8df) __C,
11748 (__mmask8) __U,
11749 _MM_FROUND_CUR_DIRECTION);
11750}
11751
11752extern __inline __m512d
11753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11754_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11755{
11756 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11757 (__v8df) __B,
11758 -(__v8df) __C,
11759 (__mmask8) __U,
11760 _MM_FROUND_CUR_DIRECTION);
11761}
11762
11763extern __inline __m512
11764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11765_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11766{
11767 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11768 (__v16sf) __B,
11769 -(__v16sf) __C,
11770 (__mmask16) -1,
11771 _MM_FROUND_CUR_DIRECTION);
11772}
11773
11774extern __inline __m512
11775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11776_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11777{
11778 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11779 (__v16sf) __B,
11780 (__v16sf) __C,
11781 (__mmask16) __U,
11782 _MM_FROUND_CUR_DIRECTION);
11783}
11784
11785extern __inline __m512
11786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11787_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11788{
11789 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11790 (__v16sf) __B,
11791 (__v16sf) __C,
11792 (__mmask16) __U,
11793 _MM_FROUND_CUR_DIRECTION);
11794}
11795
11796extern __inline __m512
11797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11798_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11799{
11800 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11801 (__v16sf) __B,
11802 -(__v16sf) __C,
11803 (__mmask16) __U,
11804 _MM_FROUND_CUR_DIRECTION);
11805}
11806
11807extern __inline __m256i
11808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11809_mm512_cvttpd_epi32 (__m512d __A)
11810{
11811 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11812 (__v8si)
0b192937 11813 _mm256_undefined_si256 (),
756c5857
AI
11814 (__mmask8) -1,
11815 _MM_FROUND_CUR_DIRECTION);
11816}
11817
11818extern __inline __m256i
11819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11820_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11821{
11822 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11823 (__v8si) __W,
11824 (__mmask8) __U,
11825 _MM_FROUND_CUR_DIRECTION);
11826}
11827
11828extern __inline __m256i
11829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11830_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11831{
11832 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11833 (__v8si)
11834 _mm256_setzero_si256 (),
11835 (__mmask8) __U,
11836 _MM_FROUND_CUR_DIRECTION);
11837}
11838
11839extern __inline __m256i
11840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11841_mm512_cvttpd_epu32 (__m512d __A)
11842{
11843 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11844 (__v8si)
0b192937 11845 _mm256_undefined_si256 (),
756c5857
AI
11846 (__mmask8) -1,
11847 _MM_FROUND_CUR_DIRECTION);
11848}
11849
11850extern __inline __m256i
11851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11852_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11853{
11854 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11855 (__v8si) __W,
11856 (__mmask8) __U,
11857 _MM_FROUND_CUR_DIRECTION);
11858}
11859
11860extern __inline __m256i
11861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11862_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11863{
11864 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11865 (__v8si)
11866 _mm256_setzero_si256 (),
11867 (__mmask8) __U,
11868 _MM_FROUND_CUR_DIRECTION);
11869}
11870
11871extern __inline __m256i
11872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11873_mm512_cvtpd_epi32 (__m512d __A)
11874{
11875 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11876 (__v8si)
0b192937 11877 _mm256_undefined_si256 (),
756c5857
AI
11878 (__mmask8) -1,
11879 _MM_FROUND_CUR_DIRECTION);
11880}
11881
11882extern __inline __m256i
11883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11884_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11885{
11886 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11887 (__v8si) __W,
11888 (__mmask8) __U,
11889 _MM_FROUND_CUR_DIRECTION);
11890}
11891
11892extern __inline __m256i
11893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11894_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11895{
11896 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11897 (__v8si)
11898 _mm256_setzero_si256 (),
11899 (__mmask8) __U,
11900 _MM_FROUND_CUR_DIRECTION);
11901}
11902
11903extern __inline __m256i
11904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11905_mm512_cvtpd_epu32 (__m512d __A)
11906{
11907 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11908 (__v8si)
0b192937 11909 _mm256_undefined_si256 (),
756c5857
AI
11910 (__mmask8) -1,
11911 _MM_FROUND_CUR_DIRECTION);
11912}
11913
11914extern __inline __m256i
11915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11916_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11917{
11918 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11919 (__v8si) __W,
11920 (__mmask8) __U,
11921 _MM_FROUND_CUR_DIRECTION);
11922}
11923
11924extern __inline __m256i
11925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11926_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11927{
11928 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11929 (__v8si)
11930 _mm256_setzero_si256 (),
11931 (__mmask8) __U,
11932 _MM_FROUND_CUR_DIRECTION);
11933}
11934
11935extern __inline __m512i
11936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11937_mm512_cvttps_epi32 (__m512 __A)
11938{
11939 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11940 (__v16si)
4271e5cb 11941 _mm512_undefined_epi32 (),
756c5857
AI
11942 (__mmask16) -1,
11943 _MM_FROUND_CUR_DIRECTION);
11944}
11945
11946extern __inline __m512i
11947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11948_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11949{
11950 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11951 (__v16si) __W,
11952 (__mmask16) __U,
11953 _MM_FROUND_CUR_DIRECTION);
11954}
11955
11956extern __inline __m512i
11957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11959{
11960 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11961 (__v16si)
11962 _mm512_setzero_si512 (),
11963 (__mmask16) __U,
11964 _MM_FROUND_CUR_DIRECTION);
11965}
11966
11967extern __inline __m512i
11968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11969_mm512_cvttps_epu32 (__m512 __A)
11970{
11971 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11972 (__v16si)
4271e5cb 11973 _mm512_undefined_epi32 (),
756c5857
AI
11974 (__mmask16) -1,
11975 _MM_FROUND_CUR_DIRECTION);
11976}
11977
11978extern __inline __m512i
11979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11980_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11981{
11982 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11983 (__v16si) __W,
11984 (__mmask16) __U,
11985 _MM_FROUND_CUR_DIRECTION);
11986}
11987
11988extern __inline __m512i
11989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11990_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11991{
11992 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11993 (__v16si)
11994 _mm512_setzero_si512 (),
11995 (__mmask16) __U,
11996 _MM_FROUND_CUR_DIRECTION);
11997}
11998
11999extern __inline __m512i
12000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12001_mm512_cvtps_epi32 (__m512 __A)
12002{
12003 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12004 (__v16si)
4271e5cb 12005 _mm512_undefined_epi32 (),
756c5857
AI
12006 (__mmask16) -1,
12007 _MM_FROUND_CUR_DIRECTION);
12008}
12009
12010extern __inline __m512i
12011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12012_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12013{
12014 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12015 (__v16si) __W,
12016 (__mmask16) __U,
12017 _MM_FROUND_CUR_DIRECTION);
12018}
12019
12020extern __inline __m512i
12021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12022_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
12023{
12024 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12025 (__v16si)
12026 _mm512_setzero_si512 (),
12027 (__mmask16) __U,
12028 _MM_FROUND_CUR_DIRECTION);
12029}
12030
12031extern __inline __m512i
12032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12033_mm512_cvtps_epu32 (__m512 __A)
12034{
12035 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12036 (__v16si)
4271e5cb 12037 _mm512_undefined_epi32 (),
756c5857
AI
12038 (__mmask16) -1,
12039 _MM_FROUND_CUR_DIRECTION);
12040}
12041
12042extern __inline __m512i
12043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12044_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12045{
12046 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12047 (__v16si) __W,
12048 (__mmask16) __U,
12049 _MM_FROUND_CUR_DIRECTION);
12050}
12051
12052extern __inline __m512i
12053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12054_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
12055{
12056 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12057 (__v16si)
12058 _mm512_setzero_si512 (),
12059 (__mmask16) __U,
12060 _MM_FROUND_CUR_DIRECTION);
12061}
12062
dcb2c527
JJ
12063extern __inline double
12064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12065_mm512_cvtsd_f64 (__m512d __A)
12066{
12067 return __A[0];
12068}
12069
12070extern __inline float
12071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12072_mm512_cvtss_f32 (__m512 __A)
12073{
12074 return __A[0];
12075}
12076
756c5857
AI
12077#ifdef __x86_64__
12078extern __inline __m128
12079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12080_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
12081{
12082 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
12083 _MM_FROUND_CUR_DIRECTION);
12084}
12085
12086extern __inline __m128d
12087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12088_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
12089{
12090 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
12091 _MM_FROUND_CUR_DIRECTION);
12092}
12093#endif
12094
12095extern __inline __m128
12096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12097_mm_cvtu32_ss (__m128 __A, unsigned __B)
12098{
12099 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
12100 _MM_FROUND_CUR_DIRECTION);
12101}
12102
12103extern __inline __m512
12104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12105_mm512_cvtepi32_ps (__m512i __A)
12106{
12107 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12108 (__v16sf)
0b192937 12109 _mm512_undefined_ps (),
756c5857
AI
12110 (__mmask16) -1,
12111 _MM_FROUND_CUR_DIRECTION);
12112}
12113
12114extern __inline __m512
12115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12116_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12117{
12118 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12119 (__v16sf) __W,
12120 (__mmask16) __U,
12121 _MM_FROUND_CUR_DIRECTION);
12122}
12123
12124extern __inline __m512
12125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12126_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
12127{
12128 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12129 (__v16sf)
12130 _mm512_setzero_ps (),
12131 (__mmask16) __U,
12132 _MM_FROUND_CUR_DIRECTION);
12133}
12134
12135extern __inline __m512
12136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12137_mm512_cvtepu32_ps (__m512i __A)
12138{
12139 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12140 (__v16sf)
0b192937 12141 _mm512_undefined_ps (),
756c5857
AI
12142 (__mmask16) -1,
12143 _MM_FROUND_CUR_DIRECTION);
12144}
12145
12146extern __inline __m512
12147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12148_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12149{
12150 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12151 (__v16sf) __W,
12152 (__mmask16) __U,
12153 _MM_FROUND_CUR_DIRECTION);
12154}
12155
12156extern __inline __m512
12157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12158_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12159{
12160 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12161 (__v16sf)
12162 _mm512_setzero_ps (),
12163 (__mmask16) __U,
12164 _MM_FROUND_CUR_DIRECTION);
12165}
12166
12167#ifdef __OPTIMIZE__
12168extern __inline __m512d
12169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12170_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12171{
12172 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12173 (__v8df) __B,
12174 (__v8di) __C,
12175 __imm,
12176 (__mmask8) -1,
12177 _MM_FROUND_CUR_DIRECTION);
12178}
12179
12180extern __inline __m512d
12181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12182_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12183 __m512i __C, const int __imm)
12184{
12185 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12186 (__v8df) __B,
12187 (__v8di) __C,
12188 __imm,
12189 (__mmask8) __U,
12190 _MM_FROUND_CUR_DIRECTION);
12191}
12192
12193extern __inline __m512d
12194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12195_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12196 __m512i __C, const int __imm)
12197{
12198 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12199 (__v8df) __B,
12200 (__v8di) __C,
12201 __imm,
12202 (__mmask8) __U,
12203 _MM_FROUND_CUR_DIRECTION);
12204}
12205
12206extern __inline __m512
12207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12208_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12209{
12210 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12211 (__v16sf) __B,
12212 (__v16si) __C,
12213 __imm,
12214 (__mmask16) -1,
12215 _MM_FROUND_CUR_DIRECTION);
12216}
12217
12218extern __inline __m512
12219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12220_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12221 __m512i __C, const int __imm)
12222{
12223 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12224 (__v16sf) __B,
12225 (__v16si) __C,
12226 __imm,
12227 (__mmask16) __U,
12228 _MM_FROUND_CUR_DIRECTION);
12229}
12230
12231extern __inline __m512
12232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12233_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12234 __m512i __C, const int __imm)
12235{
12236 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12237 (__v16sf) __B,
12238 (__v16si) __C,
12239 __imm,
12240 (__mmask16) __U,
12241 _MM_FROUND_CUR_DIRECTION);
12242}
12243
12244extern __inline __m128d
12245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12246_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12247{
12248 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12249 (__v2df) __B,
12250 (__v2di) __C, __imm,
12251 (__mmask8) -1,
12252 _MM_FROUND_CUR_DIRECTION);
12253}
12254
12255extern __inline __m128d
12256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12257_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12258 __m128i __C, const int __imm)
12259{
12260 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12261 (__v2df) __B,
12262 (__v2di) __C, __imm,
12263 (__mmask8) __U,
12264 _MM_FROUND_CUR_DIRECTION);
12265}
12266
12267extern __inline __m128d
12268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12269_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12270 __m128i __C, const int __imm)
12271{
12272 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12273 (__v2df) __B,
12274 (__v2di) __C,
12275 __imm,
12276 (__mmask8) __U,
12277 _MM_FROUND_CUR_DIRECTION);
12278}
12279
12280extern __inline __m128
12281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12283{
12284 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12285 (__v4sf) __B,
12286 (__v4si) __C, __imm,
12287 (__mmask8) -1,
12288 _MM_FROUND_CUR_DIRECTION);
12289}
12290
12291extern __inline __m128
12292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12293_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12294 __m128i __C, const int __imm)
12295{
12296 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12297 (__v4sf) __B,
12298 (__v4si) __C, __imm,
12299 (__mmask8) __U,
12300 _MM_FROUND_CUR_DIRECTION);
12301}
12302
12303extern __inline __m128
12304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12305_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12306 __m128i __C, const int __imm)
12307{
12308 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12309 (__v4sf) __B,
12310 (__v4si) __C, __imm,
12311 (__mmask8) __U,
12312 _MM_FROUND_CUR_DIRECTION);
12313}
12314#else
12315#define _mm512_fixupimm_pd(X, Y, Z, C) \
12316 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12317 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12318 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12319
12320#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12321 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12322 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12323 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12324
12325#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12326 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12327 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12328 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12329
12330#define _mm512_fixupimm_ps(X, Y, Z, C) \
12331 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12332 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12333 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12334
12335#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12336 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12337 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12338 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12339
12340#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12341 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12342 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12343 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12344
12345#define _mm_fixupimm_sd(X, Y, Z, C) \
12346 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12347 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12348 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12349
12350#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12351 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12352 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12353 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12354
12355#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12356 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12357 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12358 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12359
12360#define _mm_fixupimm_ss(X, Y, Z, C) \
12361 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12362 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12363 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12364
12365#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12366 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12367 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12368 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12369
12370#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12371 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12372 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12373 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12374#endif
12375
12376#ifdef __x86_64__
12377extern __inline unsigned long long
12378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12379_mm_cvtss_u64 (__m128 __A)
12380{
12381 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12382 __A,
12383 _MM_FROUND_CUR_DIRECTION);
12384}
12385
12386extern __inline unsigned long long
12387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12388_mm_cvttss_u64 (__m128 __A)
12389{
12390 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12391 __A,
12392 _MM_FROUND_CUR_DIRECTION);
12393}
12394
12395extern __inline long long
12396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12397_mm_cvttss_i64 (__m128 __A)
12398{
12399 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12400 _MM_FROUND_CUR_DIRECTION);
12401}
12402#endif /* __x86_64__ */
12403
12404extern __inline unsigned
12405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12406_mm_cvtss_u32 (__m128 __A)
12407{
12408 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12409 _MM_FROUND_CUR_DIRECTION);
12410}
12411
12412extern __inline unsigned
12413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12414_mm_cvttss_u32 (__m128 __A)
12415{
12416 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12417 _MM_FROUND_CUR_DIRECTION);
12418}
12419
12420extern __inline int
12421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12422_mm_cvttss_i32 (__m128 __A)
12423{
12424 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12425 _MM_FROUND_CUR_DIRECTION);
12426}
12427
12428#ifdef __x86_64__
12429extern __inline unsigned long long
12430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12431_mm_cvtsd_u64 (__m128d __A)
12432{
12433 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12434 __A,
12435 _MM_FROUND_CUR_DIRECTION);
12436}
12437
12438extern __inline unsigned long long
12439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12440_mm_cvttsd_u64 (__m128d __A)
12441{
12442 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12443 __A,
12444 _MM_FROUND_CUR_DIRECTION);
12445}
12446
12447extern __inline long long
12448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12449_mm_cvttsd_i64 (__m128d __A)
12450{
12451 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12452 _MM_FROUND_CUR_DIRECTION);
12453}
12454#endif /* __x86_64__ */
12455
12456extern __inline unsigned
12457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12458_mm_cvtsd_u32 (__m128d __A)
12459{
12460 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12461 _MM_FROUND_CUR_DIRECTION);
12462}
12463
12464extern __inline unsigned
12465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12466_mm_cvttsd_u32 (__m128d __A)
12467{
12468 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12469 _MM_FROUND_CUR_DIRECTION);
12470}
12471
12472extern __inline int
12473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12474_mm_cvttsd_i32 (__m128d __A)
12475{
12476 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12477 _MM_FROUND_CUR_DIRECTION);
12478}
12479
12480extern __inline __m512d
12481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12482_mm512_cvtps_pd (__m256 __A)
12483{
12484 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12485 (__v8df)
0b192937 12486 _mm512_undefined_pd (),
756c5857
AI
12487 (__mmask8) -1,
12488 _MM_FROUND_CUR_DIRECTION);
12489}
12490
12491extern __inline __m512d
12492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12493_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12494{
12495 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12496 (__v8df) __W,
12497 (__mmask8) __U,
12498 _MM_FROUND_CUR_DIRECTION);
12499}
12500
12501extern __inline __m512d
12502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12503_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12504{
12505 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12506 (__v8df)
12507 _mm512_setzero_pd (),
12508 (__mmask8) __U,
12509 _MM_FROUND_CUR_DIRECTION);
12510}
12511
12512extern __inline __m512
12513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12514_mm512_cvtph_ps (__m256i __A)
12515{
12516 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12517 (__v16sf)
0b192937 12518 _mm512_undefined_ps (),
756c5857
AI
12519 (__mmask16) -1,
12520 _MM_FROUND_CUR_DIRECTION);
12521}
12522
12523extern __inline __m512
12524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12525_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12526{
12527 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12528 (__v16sf) __W,
12529 (__mmask16) __U,
12530 _MM_FROUND_CUR_DIRECTION);
12531}
12532
12533extern __inline __m512
12534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12535_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12536{
12537 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12538 (__v16sf)
12539 _mm512_setzero_ps (),
12540 (__mmask16) __U,
12541 _MM_FROUND_CUR_DIRECTION);
12542}
12543
12544extern __inline __m256
12545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12546_mm512_cvtpd_ps (__m512d __A)
12547{
12548 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12549 (__v8sf)
0b192937 12550 _mm256_undefined_ps (),
756c5857
AI
12551 (__mmask8) -1,
12552 _MM_FROUND_CUR_DIRECTION);
12553}
12554
12555extern __inline __m256
12556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12557_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12558{
12559 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12560 (__v8sf) __W,
12561 (__mmask8) __U,
12562 _MM_FROUND_CUR_DIRECTION);
12563}
12564
12565extern __inline __m256
12566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12567_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12568{
12569 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12570 (__v8sf)
12571 _mm256_setzero_ps (),
12572 (__mmask8) __U,
12573 _MM_FROUND_CUR_DIRECTION);
12574}
12575
12576#ifdef __OPTIMIZE__
12577extern __inline __m512
12578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12579_mm512_getexp_ps (__m512 __A)
12580{
12581 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12582 (__v16sf)
0b192937 12583 _mm512_undefined_ps (),
756c5857
AI
12584 (__mmask16) -1,
12585 _MM_FROUND_CUR_DIRECTION);
12586}
12587
12588extern __inline __m512
12589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12590_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12591{
12592 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12593 (__v16sf) __W,
12594 (__mmask16) __U,
12595 _MM_FROUND_CUR_DIRECTION);
12596}
12597
12598extern __inline __m512
12599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12600_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12601{
12602 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12603 (__v16sf)
12604 _mm512_setzero_ps (),
12605 (__mmask16) __U,
12606 _MM_FROUND_CUR_DIRECTION);
12607}
12608
12609extern __inline __m512d
12610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12611_mm512_getexp_pd (__m512d __A)
12612{
12613 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12614 (__v8df)
0b192937 12615 _mm512_undefined_pd (),
756c5857
AI
12616 (__mmask8) -1,
12617 _MM_FROUND_CUR_DIRECTION);
12618}
12619
12620extern __inline __m512d
12621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12622_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12623{
12624 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12625 (__v8df) __W,
12626 (__mmask8) __U,
12627 _MM_FROUND_CUR_DIRECTION);
12628}
12629
12630extern __inline __m512d
12631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12632_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12633{
12634 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12635 (__v8df)
12636 _mm512_setzero_pd (),
12637 (__mmask8) __U,
12638 _MM_FROUND_CUR_DIRECTION);
12639}
12640
075691af
AI
12641extern __inline __m128
12642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12643_mm_getexp_ss (__m128 __A, __m128 __B)
12644{
12645 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12646 (__v4sf) __B,
12647 _MM_FROUND_CUR_DIRECTION);
12648}
12649
12650extern __inline __m128d
12651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12652_mm_getexp_sd (__m128d __A, __m128d __B)
12653{
12654 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12655 (__v2df) __B,
12656 _MM_FROUND_CUR_DIRECTION);
12657}
12658
756c5857
AI
12659extern __inline __m512d
12660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12661_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12662 _MM_MANTISSA_SIGN_ENUM __C)
12663{
12664 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12665 (__C << 2) | __B,
0b192937 12666 _mm512_undefined_pd (),
756c5857
AI
12667 (__mmask8) -1,
12668 _MM_FROUND_CUR_DIRECTION);
12669}
12670
12671extern __inline __m512d
12672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12673_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12674 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12675{
12676 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12677 (__C << 2) | __B,
12678 (__v8df) __W, __U,
12679 _MM_FROUND_CUR_DIRECTION);
12680}
12681
12682extern __inline __m512d
12683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12684_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12685 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12686{
12687 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12688 (__C << 2) | __B,
12689 (__v8df)
12690 _mm512_setzero_pd (),
12691 __U,
12692 _MM_FROUND_CUR_DIRECTION);
12693}
12694
12695extern __inline __m512
12696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12697_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12698 _MM_MANTISSA_SIGN_ENUM __C)
12699{
12700 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12701 (__C << 2) | __B,
0b192937 12702 _mm512_undefined_ps (),
756c5857
AI
12703 (__mmask16) -1,
12704 _MM_FROUND_CUR_DIRECTION);
12705}
12706
12707extern __inline __m512
12708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12709_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12710 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12711{
12712 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12713 (__C << 2) | __B,
12714 (__v16sf) __W, __U,
12715 _MM_FROUND_CUR_DIRECTION);
12716}
12717
12718extern __inline __m512
12719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12720_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12721 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12722{
12723 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12724 (__C << 2) | __B,
12725 (__v16sf)
12726 _mm512_setzero_ps (),
12727 __U,
12728 _MM_FROUND_CUR_DIRECTION);
12729}
12730
075691af
AI
12731extern __inline __m128d
12732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12733_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12734 _MM_MANTISSA_SIGN_ENUM __D)
12735{
12736 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12737 (__v2df) __B,
12738 (__D << 2) | __C,
12739 _MM_FROUND_CUR_DIRECTION);
12740}
12741
12742extern __inline __m128
12743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12744_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12745 _MM_MANTISSA_SIGN_ENUM __D)
12746{
12747 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12748 (__v4sf) __B,
12749 (__D << 2) | __C,
12750 _MM_FROUND_CUR_DIRECTION);
12751}
12752
756c5857
AI
12753#else
12754#define _mm512_getmant_pd(X, B, C) \
12755 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12756 (int)(((C)<<2) | (B)), \
0b192937 12757 (__v8df)_mm512_undefined_pd(), \
756c5857
AI
12758 (__mmask8)-1,\
12759 _MM_FROUND_CUR_DIRECTION))
12760
12761#define _mm512_mask_getmant_pd(W, U, X, B, C) \
12762 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12763 (int)(((C)<<2) | (B)), \
12764 (__v8df)(__m512d)(W), \
12765 (__mmask8)(U),\
12766 _MM_FROUND_CUR_DIRECTION))
12767
12768#define _mm512_maskz_getmant_pd(U, X, B, C) \
12769 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12770 (int)(((C)<<2) | (B)), \
0b192937 12771 (__v8df)_mm512_setzero_pd(), \
756c5857
AI
12772 (__mmask8)(U),\
12773 _MM_FROUND_CUR_DIRECTION))
12774#define _mm512_getmant_ps(X, B, C) \
12775 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12776 (int)(((C)<<2) | (B)), \
0b192937 12777 (__v16sf)_mm512_undefined_ps(), \
756c5857
AI
12778 (__mmask16)-1,\
12779 _MM_FROUND_CUR_DIRECTION))
12780
12781#define _mm512_mask_getmant_ps(W, U, X, B, C) \
12782 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12783 (int)(((C)<<2) | (B)), \
12784 (__v16sf)(__m512)(W), \
12785 (__mmask16)(U),\
12786 _MM_FROUND_CUR_DIRECTION))
12787
12788#define _mm512_maskz_getmant_ps(U, X, B, C) \
12789 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12790 (int)(((C)<<2) | (B)), \
0b192937 12791 (__v16sf)_mm512_setzero_ps(), \
756c5857
AI
12792 (__mmask16)(U),\
12793 _MM_FROUND_CUR_DIRECTION))
075691af
AI
12794#define _mm_getmant_sd(X, Y, C, D) \
12795 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12796 (__v2df)(__m128d)(Y), \
12797 (int)(((D)<<2) | (C)), \
12798 _MM_FROUND_CUR_DIRECTION))
12799
12800#define _mm_getmant_ss(X, Y, C, D) \
12801 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12802 (__v4sf)(__m128)(Y), \
12803 (int)(((D)<<2) | (C)), \
12804 _MM_FROUND_CUR_DIRECTION))
12805
12806#define _mm_getexp_ss(A, B) \
12807 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12808 _MM_FROUND_CUR_DIRECTION))
12809
12810#define _mm_getexp_sd(A, B) \
12811 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12812 _MM_FROUND_CUR_DIRECTION))
12813
756c5857
AI
12814#define _mm512_getexp_ps(A) \
12815 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 12816 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12817
12818#define _mm512_mask_getexp_ps(W, U, A) \
12819 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12820 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12821
12822#define _mm512_maskz_getexp_ps(U, A) \
12823 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12824 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12825
12826#define _mm512_getexp_pd(A) \
12827 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 12828 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12829
12830#define _mm512_mask_getexp_pd(W, U, A) \
12831 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12832 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12833
12834#define _mm512_maskz_getexp_pd(U, A) \
12835 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12836 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12837#endif
12838
12839#ifdef __OPTIMIZE__
12840extern __inline __m512
12841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12842_mm512_roundscale_ps (__m512 __A, const int __imm)
12843{
12844 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
12845 (__v16sf)
12846 _mm512_undefined_ps (),
12847 -1,
756c5857
AI
12848 _MM_FROUND_CUR_DIRECTION);
12849}
12850
12851extern __inline __m512
12852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12853_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12854 const int __imm)
12855{
12856 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12857 (__v16sf) __A,
12858 (__mmask16) __B,
12859 _MM_FROUND_CUR_DIRECTION);
12860}
12861
12862extern __inline __m512
12863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12864_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12865{
12866 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12867 __imm,
12868 (__v16sf)
12869 _mm512_setzero_ps (),
12870 (__mmask16) __A,
12871 _MM_FROUND_CUR_DIRECTION);
12872}
12873
12874extern __inline __m512d
12875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12876_mm512_roundscale_pd (__m512d __A, const int __imm)
12877{
12878 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
12879 (__v8df)
12880 _mm512_undefined_pd (),
12881 -1,
756c5857
AI
12882 _MM_FROUND_CUR_DIRECTION);
12883}
12884
12885extern __inline __m512d
12886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12887_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12888 const int __imm)
12889{
12890 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12891 (__v8df) __A,
12892 (__mmask8) __B,
12893 _MM_FROUND_CUR_DIRECTION);
12894}
12895
12896extern __inline __m512d
12897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12898_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12899{
12900 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12901 __imm,
12902 (__v8df)
12903 _mm512_setzero_pd (),
12904 (__mmask8) __A,
12905 _MM_FROUND_CUR_DIRECTION);
12906}
12907
075691af
AI
12908extern __inline __m128
12909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12910_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12911{
12912 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12913 (__v4sf) __B, __imm,
12914 _MM_FROUND_CUR_DIRECTION);
12915}
12916
12917extern __inline __m128d
12918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12919_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12920{
12921 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12922 (__v2df) __B, __imm,
12923 _MM_FROUND_CUR_DIRECTION);
12924}
12925
756c5857
AI
12926#else
12927#define _mm512_roundscale_ps(A, B) \
12928 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 12929 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12930#define _mm512_mask_roundscale_ps(A, B, C, D) \
12931 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12932 (int)(D), \
12933 (__v16sf)(__m512)(A), \
12934 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12935#define _mm512_maskz_roundscale_ps(A, B, C) \
12936 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12937 (int)(C), \
12938 (__v16sf)_mm512_setzero_ps(),\
12939 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12940#define _mm512_roundscale_pd(A, B) \
12941 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 12942 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12943#define _mm512_mask_roundscale_pd(A, B, C, D) \
12944 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12945 (int)(D), \
12946 (__v8df)(__m512d)(A), \
12947 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12948#define _mm512_maskz_roundscale_pd(A, B, C) \
12949 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12950 (int)(C), \
12951 (__v8df)_mm512_setzero_pd(),\
12952 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
075691af
AI
12953#define _mm_roundscale_ss(A, B, C) \
12954 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12955 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12956#define _mm_roundscale_sd(A, B, C) \
12957 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12958 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12959#endif
12960
12961#ifdef __OPTIMIZE__
12962extern __inline __mmask8
12963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12964_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12965{
12966 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12967 (__v8df) __Y, __P,
12968 (__mmask8) -1,
12969 _MM_FROUND_CUR_DIRECTION);
12970}
12971
12972extern __inline __mmask16
12973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12974_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12975{
12976 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12977 (__v16sf) __Y, __P,
12978 (__mmask16) -1,
12979 _MM_FROUND_CUR_DIRECTION);
12980}
12981
12982extern __inline __mmask16
12983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12984_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12985{
12986 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12987 (__v16sf) __Y, __P,
12988 (__mmask16) __U,
12989 _MM_FROUND_CUR_DIRECTION);
12990}
12991
12992extern __inline __mmask8
12993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12994_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12995{
12996 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12997 (__v8df) __Y, __P,
12998 (__mmask8) __U,
12999 _MM_FROUND_CUR_DIRECTION);
13000}
13001
13002extern __inline __mmask8
13003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13004_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
13005{
13006 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
13007 (__v2df) __Y, __P,
13008 (__mmask8) -1,
13009 _MM_FROUND_CUR_DIRECTION);
13010}
13011
13012extern __inline __mmask8
13013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13014_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
13015{
13016 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
13017 (__v2df) __Y, __P,
13018 (__mmask8) __M,
13019 _MM_FROUND_CUR_DIRECTION);
13020}
13021
13022extern __inline __mmask8
13023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13024_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
13025{
13026 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
13027 (__v4sf) __Y, __P,
13028 (__mmask8) -1,
13029 _MM_FROUND_CUR_DIRECTION);
13030}
13031
13032extern __inline __mmask8
13033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13034_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
13035{
13036 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
13037 (__v4sf) __Y, __P,
13038 (__mmask8) __M,
13039 _MM_FROUND_CUR_DIRECTION);
13040}
13041
13042#else
13043#define _mm512_cmp_pd_mask(X, Y, P) \
13044 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13045 (__v8df)(__m512d)(Y), (int)(P),\
13046 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13047
13048#define _mm512_cmp_ps_mask(X, Y, P) \
13049 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13050 (__v16sf)(__m512)(Y), (int)(P),\
13051 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
13052
13053#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
13054 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13055 (__v8df)(__m512d)(Y), (int)(P),\
13056 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
13057
13058#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
13059 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13060 (__v16sf)(__m512)(Y), (int)(P),\
13061 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
13062
13063#define _mm_cmp_sd_mask(X, Y, P) \
13064 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13065 (__v2df)(__m128d)(Y), (int)(P),\
13066 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13067
13068#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
13069 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13070 (__v2df)(__m128d)(Y), (int)(P),\
13071 M,_MM_FROUND_CUR_DIRECTION))
13072
13073#define _mm_cmp_ss_mask(X, Y, P) \
13074 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13075 (__v4sf)(__m128)(Y), (int)(P), \
13076 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13077
13078#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
13079 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13080 (__v4sf)(__m128)(Y), (int)(P), \
13081 M,_MM_FROUND_CUR_DIRECTION))
13082#endif
13083
2196a885
KY
13084extern __inline __mmask16
13085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13086_mm512_kmov (__mmask16 __A)
13087{
7cdb6e4c 13088 return __builtin_ia32_kmovw (__A);
2196a885
KY
13089}
13090
275be1da
IT
13091extern __inline __m512
13092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13093_mm512_castpd_ps (__m512d __A)
13094{
13095 return (__m512) (__A);
13096}
13097
13098extern __inline __m512i
13099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13100_mm512_castpd_si512 (__m512d __A)
13101{
13102 return (__m512i) (__A);
13103}
13104
13105extern __inline __m512d
13106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13107_mm512_castps_pd (__m512 __A)
13108{
13109 return (__m512d) (__A);
13110}
13111
13112extern __inline __m512i
13113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13114_mm512_castps_si512 (__m512 __A)
13115{
13116 return (__m512i) (__A);
13117}
13118
13119extern __inline __m512
13120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13121_mm512_castsi512_ps (__m512i __A)
13122{
13123 return (__m512) (__A);
13124}
13125
13126extern __inline __m512d
13127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13128_mm512_castsi512_pd (__m512i __A)
13129{
13130 return (__m512d) (__A);
13131}
13132
13133extern __inline __m128d
13134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13135_mm512_castpd512_pd128 (__m512d __A)
13136{
13137 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
13138}
13139
13140extern __inline __m128
13141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13142_mm512_castps512_ps128 (__m512 __A)
13143{
13144 return _mm512_extractf32x4_ps(__A, 0);
13145}
13146
13147extern __inline __m128i
13148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13149_mm512_castsi512_si128 (__m512i __A)
13150{
13151 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13152}
13153
13154extern __inline __m256d
13155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13156_mm512_castpd512_pd256 (__m512d __A)
13157{
13158 return _mm512_extractf64x4_pd(__A, 0);
13159}
13160
13161extern __inline __m256
13162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13163_mm512_castps512_ps256 (__m512 __A)
13164{
13165 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13166}
13167
13168extern __inline __m256i
13169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13170_mm512_castsi512_si256 (__m512i __A)
13171{
13172 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13173}
13174
13175extern __inline __m512d
13176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13177_mm512_castpd128_pd512 (__m128d __A)
13178{
13179 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13180}
13181
13182extern __inline __m512
13183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13184_mm512_castps128_ps512 (__m128 __A)
13185{
13186 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13187}
13188
13189extern __inline __m512i
13190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13191_mm512_castsi128_si512 (__m128i __A)
13192{
13193 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13194}
13195
13196extern __inline __m512d
13197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13198_mm512_castpd256_pd512 (__m256d __A)
13199{
13200 return __builtin_ia32_pd512_256pd (__A);
13201}
13202
13203extern __inline __m512
13204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13205_mm512_castps256_ps512 (__m256 __A)
13206{
13207 return __builtin_ia32_ps512_256ps (__A);
13208}
13209
13210extern __inline __m512i
13211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13212_mm512_castsi256_si512 (__m256i __A)
13213{
13214 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13215}
13216
13217extern __inline __mmask16
13218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13219_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13220{
13221 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13222 (__v16si) __B, 0,
13223 (__mmask16) -1);
13224}
13225
13226extern __inline __mmask16
13227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13228_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13229{
13230 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13231 (__v16si) __B, 0, __U);
13232}
13233
13234extern __inline __mmask8
13235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13236_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13237{
13238 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13239 (__v8di) __B, 0, __U);
13240}
13241
13242extern __inline __mmask8
13243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13244_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13245{
13246 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13247 (__v8di) __B, 0,
13248 (__mmask8) -1);
13249}
13250
13251extern __inline __mmask16
13252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13253_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13254{
13255 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13256 (__v16si) __B, 6,
13257 (__mmask16) -1);
13258}
13259
13260extern __inline __mmask16
13261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13262_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13263{
13264 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13265 (__v16si) __B, 6, __U);
13266}
13267
13268extern __inline __mmask8
13269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13270_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13271{
13272 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13273 (__v8di) __B, 6, __U);
13274}
13275
13276extern __inline __mmask8
13277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13278_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13279{
13280 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13281 (__v8di) __B, 6,
13282 (__mmask8) -1);
13283}
13284
756c5857
AI
13285#ifdef __DISABLE_AVX512F__
13286#undef __DISABLE_AVX512F__
13287#pragma GCC pop_options
13288#endif /* __DISABLE_AVX512F__ */
13289
13290#endif /* _AVX512FINTRIN_H_INCLUDED */