]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512fintrin.h
Remove old rounding code
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
cbe34bb5 1/* Copyright (C) 2013-2017 Free Software Foundation, Inc.
756c5857
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
2069d6fc 41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
756c5857 42typedef int __v16si __attribute__ ((__vector_size__ (64)));
2069d6fc 43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
7d9088c2 44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
2069d6fc 45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
7d9088c2 46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
2069d6fc 47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
756c5857
AI
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
c6b0037d
MG
55/* Unaligned version of the same type. */
56typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
756c5857
AI
60typedef unsigned char __mmask8;
61typedef unsigned short __mmask16;
62
dcb2c527
JJ
63extern __inline __mmask16
64__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65_mm512_int2mask (int __M)
66{
67 return (__mmask16) __M;
68}
69
70extern __inline int
71__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72_mm512_mask2int (__mmask16 __M)
73{
74 return (int) __M;
75}
76
756c5857
AI
77extern __inline __m512i
78__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79_mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
82{
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
85}
86
87/* Create the vector [A B C D E F G H I J K L M N O P]. */
88extern __inline __m512i
89__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90_mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
94{
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
98}
99
100extern __inline __m512d
101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
102_mm512_set_pd (double __A, double __B, double __C, double __D,
103 double __E, double __F, double __G, double __H)
104{
105 return __extension__ (__m512d)
106 { __H, __G, __F, __E, __D, __C, __B, __A };
107}
108
109extern __inline __m512
110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
111_mm512_set_ps (float __A, float __B, float __C, float __D,
112 float __E, float __F, float __G, float __H,
113 float __I, float __J, float __K, float __L,
114 float __M, float __N, float __O, float __P)
115{
116 return __extension__ (__m512)
117 { __P, __O, __N, __M, __L, __K, __J, __I,
118 __H, __G, __F, __E, __D, __C, __B, __A };
119}
120
121#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
122 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
123
124#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
125 e8,e9,e10,e11,e12,e13,e14,e15) \
126 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
127
128#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
129 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
130
131#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
132 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
133
0b192937
UD
134extern __inline __m512
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm512_undefined_ps (void)
137{
138 __m512 __Y = __Y;
139 return __Y;
140}
141
dcb2c527
JJ
142#define _mm512_undefined _mm512_undefined_ps
143
0b192937
UD
144extern __inline __m512d
145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146_mm512_undefined_pd (void)
147{
148 __m512d __Y = __Y;
149 return __Y;
150}
151
152extern __inline __m512i
153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4271e5cb 154_mm512_undefined_epi32 (void)
0b192937
UD
155{
156 __m512i __Y = __Y;
157 return __Y;
158}
159
4271e5cb
UB
160#define _mm512_undefined_si512 _mm512_undefined_epi32
161
7d9088c2
UD
162extern __inline __m512i
163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164_mm512_set1_epi8 (char __A)
165{
166 return __extension__ (__m512i)(__v64qi)
167 { __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A,
170 __A, __A, __A, __A, __A, __A, __A, __A,
171 __A, __A, __A, __A, __A, __A, __A, __A,
172 __A, __A, __A, __A, __A, __A, __A, __A,
173 __A, __A, __A, __A, __A, __A, __A, __A,
174 __A, __A, __A, __A, __A, __A, __A, __A };
175}
176
177extern __inline __m512i
178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179_mm512_set1_epi16 (short __A)
180{
181 return __extension__ (__m512i)(__v32hi)
182 { __A, __A, __A, __A, __A, __A, __A, __A,
183 __A, __A, __A, __A, __A, __A, __A, __A,
184 __A, __A, __A, __A, __A, __A, __A, __A,
185 __A, __A, __A, __A, __A, __A, __A, __A };
186}
187
2b2384e8
UD
188extern __inline __m512d
189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190_mm512_set1_pd (double __A)
191{
192 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
193 (__v2df) { __A, },
194 (__v8df)
195 _mm512_undefined_pd (),
196 (__mmask8) -1);
197}
198
199extern __inline __m512
200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201_mm512_set1_ps (float __A)
202{
203 return (__m512) __builtin_ia32_broadcastss512 (__extension__
204 (__v4sf) { __A, },
205 (__v16sf)
206 _mm512_undefined_ps (),
207 (__mmask16) -1);
208}
209
7d9088c2
UD
210/* Create the vector [A B C D A B C D A B C D A B C D]. */
211extern __inline __m512i
212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
214{
215 return __extension__ (__m512i)(__v16si)
216 { __D, __C, __B, __A, __D, __C, __B, __A,
217 __D, __C, __B, __A, __D, __C, __B, __A };
218}
219
220extern __inline __m512i
221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222_mm512_set4_epi64 (long long __A, long long __B, long long __C,
223 long long __D)
224{
225 return __extension__ (__m512i) (__v8di)
226 { __D, __C, __B, __A, __D, __C, __B, __A };
227}
228
229extern __inline __m512d
230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231_mm512_set4_pd (double __A, double __B, double __C, double __D)
232{
233 return __extension__ (__m512d)
234 { __D, __C, __B, __A, __D, __C, __B, __A };
235}
236
237extern __inline __m512
238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239_mm512_set4_ps (float __A, float __B, float __C, float __D)
240{
241 return __extension__ (__m512)
242 { __D, __C, __B, __A, __D, __C, __B, __A,
243 __D, __C, __B, __A, __D, __C, __B, __A };
244}
245
246#define _mm512_setr4_epi64(e0,e1,e2,e3) \
247 _mm512_set4_epi64(e3,e2,e1,e0)
248
249#define _mm512_setr4_epi32(e0,e1,e2,e3) \
250 _mm512_set4_epi32(e3,e2,e1,e0)
251
252#define _mm512_setr4_pd(e0,e1,e2,e3) \
253 _mm512_set4_pd(e3,e2,e1,e0)
254
255#define _mm512_setr4_ps(e0,e1,e2,e3) \
256 _mm512_set4_ps(e3,e2,e1,e0)
257
756c5857
AI
258extern __inline __m512
259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
260_mm512_setzero_ps (void)
261{
262 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
263 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
264}
265
266extern __inline __m512d
267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268_mm512_setzero_pd (void)
269{
270 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
271}
272
7d9088c2
UD
273extern __inline __m512i
274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275_mm512_setzero_epi32 (void)
276{
277 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
278}
279
756c5857
AI
280extern __inline __m512i
281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282_mm512_setzero_si512 (void)
283{
284 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
285}
286
287extern __inline __m512d
288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
290{
291 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
292 (__v8df) __W,
293 (__mmask8) __U);
294}
295
296extern __inline __m512d
297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
299{
300 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
301 (__v8df)
302 _mm512_setzero_pd (),
303 (__mmask8) __U);
304}
305
306extern __inline __m512
307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
309{
310 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
311 (__v16sf) __W,
312 (__mmask16) __U);
313}
314
315extern __inline __m512
316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
317_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
318{
319 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
320 (__v16sf)
321 _mm512_setzero_ps (),
322 (__mmask16) __U);
323}
324
325extern __inline __m512d
326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327_mm512_load_pd (void const *__P)
328{
329 return *(__m512d *) __P;
330}
331
332extern __inline __m512d
333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
335{
336 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
337 (__v8df) __W,
338 (__mmask8) __U);
339}
340
341extern __inline __m512d
342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
344{
345 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
346 (__v8df)
347 _mm512_setzero_pd (),
348 (__mmask8) __U);
349}
350
351extern __inline void
352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353_mm512_store_pd (void *__P, __m512d __A)
354{
355 *(__m512d *) __P = __A;
356}
357
358extern __inline void
359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
361{
362 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
363 (__mmask8) __U);
364}
365
366extern __inline __m512
367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368_mm512_load_ps (void const *__P)
369{
370 return *(__m512 *) __P;
371}
372
373extern __inline __m512
374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
376{
377 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
378 (__v16sf) __W,
379 (__mmask16) __U);
380}
381
382extern __inline __m512
383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
385{
386 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
387 (__v16sf)
388 _mm512_setzero_ps (),
389 (__mmask16) __U);
390}
391
392extern __inline void
393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
394_mm512_store_ps (void *__P, __m512 __A)
395{
396 *(__m512 *) __P = __A;
397}
398
399extern __inline void
400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
401_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
402{
403 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
404 (__mmask16) __U);
405}
406
407extern __inline __m512i
408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
410{
411 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
412 (__v8di) __W,
413 (__mmask8) __U);
414}
415
416extern __inline __m512i
417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
419{
420 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
421 (__v8di)
422 _mm512_setzero_si512 (),
423 (__mmask8) __U);
424}
425
426extern __inline __m512i
427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428_mm512_load_epi64 (void const *__P)
429{
430 return *(__m512i *) __P;
431}
432
433extern __inline __m512i
434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
436{
437 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
438 (__v8di) __W,
439 (__mmask8) __U);
440}
441
442extern __inline __m512i
443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
445{
446 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
447 (__v8di)
448 _mm512_setzero_si512 (),
449 (__mmask8) __U);
450}
451
452extern __inline void
453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454_mm512_store_epi64 (void *__P, __m512i __A)
455{
456 *(__m512i *) __P = __A;
457}
458
459extern __inline void
460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
462{
463 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
464 (__mmask8) __U);
465}
466
467extern __inline __m512i
468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
470{
471 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
472 (__v16si) __W,
473 (__mmask16) __U);
474}
475
476extern __inline __m512i
477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
479{
480 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
481 (__v16si)
482 _mm512_setzero_si512 (),
483 (__mmask16) __U);
484}
485
486extern __inline __m512i
487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488_mm512_load_si512 (void const *__P)
489{
490 return *(__m512i *) __P;
491}
492
493extern __inline __m512i
494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495_mm512_load_epi32 (void const *__P)
496{
497 return *(__m512i *) __P;
498}
499
500extern __inline __m512i
501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
503{
504 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
505 (__v16si) __W,
506 (__mmask16) __U);
507}
508
509extern __inline __m512i
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
512{
513 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
514 (__v16si)
515 _mm512_setzero_si512 (),
516 (__mmask16) __U);
517}
518
519extern __inline void
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm512_store_si512 (void *__P, __m512i __A)
522{
523 *(__m512i *) __P = __A;
524}
525
526extern __inline void
527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528_mm512_store_epi32 (void *__P, __m512i __A)
529{
530 *(__m512i *) __P = __A;
531}
532
533extern __inline void
534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
536{
537 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
538 (__mmask16) __U);
539}
540
541extern __inline __m512i
542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543_mm512_mullo_epi32 (__m512i __A, __m512i __B)
544{
2069d6fc 545 return (__m512i) ((__v16su) __A * (__v16su) __B);
756c5857
AI
546}
547
548extern __inline __m512i
549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
551{
552 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
553 (__v16si) __B,
554 (__v16si)
555 _mm512_setzero_si512 (),
556 __M);
557}
558
559extern __inline __m512i
560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
562{
563 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
564 (__v16si) __B,
565 (__v16si) __W, __M);
566}
567
568extern __inline __m512i
569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
571{
572 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
573 (__v16si) __Y,
574 (__v16si)
4271e5cb 575 _mm512_undefined_epi32 (),
756c5857
AI
576 (__mmask16) -1);
577}
578
579extern __inline __m512i
580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
582{
583 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
584 (__v16si) __Y,
585 (__v16si) __W,
586 (__mmask16) __U);
587}
588
589extern __inline __m512i
590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
592{
593 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
594 (__v16si) __Y,
595 (__v16si)
596 _mm512_setzero_si512 (),
597 (__mmask16) __U);
598}
599
600extern __inline __m512i
601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602_mm512_srav_epi32 (__m512i __X, __m512i __Y)
603{
604 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
605 (__v16si) __Y,
606 (__v16si)
4271e5cb 607 _mm512_undefined_epi32 (),
756c5857
AI
608 (__mmask16) -1);
609}
610
611extern __inline __m512i
612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
614{
615 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
616 (__v16si) __Y,
617 (__v16si) __W,
618 (__mmask16) __U);
619}
620
621extern __inline __m512i
622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
624{
625 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
626 (__v16si) __Y,
627 (__v16si)
628 _mm512_setzero_si512 (),
629 (__mmask16) __U);
630}
631
632extern __inline __m512i
633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
635{
636 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
637 (__v16si) __Y,
638 (__v16si)
4271e5cb 639 _mm512_undefined_epi32 (),
756c5857
AI
640 (__mmask16) -1);
641}
642
643extern __inline __m512i
644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
646{
647 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
648 (__v16si) __Y,
649 (__v16si) __W,
650 (__mmask16) __U);
651}
652
653extern __inline __m512i
654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
656{
657 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
658 (__v16si) __Y,
659 (__v16si)
660 _mm512_setzero_si512 (),
661 (__mmask16) __U);
662}
663
664extern __inline __m512i
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm512_add_epi64 (__m512i __A, __m512i __B)
667{
2069d6fc 668 return (__m512i) ((__v8du) __A + (__v8du) __B);
756c5857
AI
669}
670
671extern __inline __m512i
672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
674{
675 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
676 (__v8di) __B,
677 (__v8di) __W,
678 (__mmask8) __U);
679}
680
681extern __inline __m512i
682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
684{
685 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
686 (__v8di) __B,
687 (__v8di)
688 _mm512_setzero_si512 (),
689 (__mmask8) __U);
690}
691
692extern __inline __m512i
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm512_sub_epi64 (__m512i __A, __m512i __B)
695{
2069d6fc 696 return (__m512i) ((__v8du) __A - (__v8du) __B);
756c5857
AI
697}
698
699extern __inline __m512i
700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
704 (__v8di) __B,
705 (__v8di) __W,
706 (__mmask8) __U);
707}
708
709extern __inline __m512i
710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
712{
713 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
714 (__v8di) __B,
715 (__v8di)
716 _mm512_setzero_si512 (),
717 (__mmask8) __U);
718}
719
720extern __inline __m512i
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
723{
724 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
725 (__v8di) __Y,
726 (__v8di)
0b192937 727 _mm512_undefined_pd (),
756c5857
AI
728 (__mmask8) -1);
729}
730
731extern __inline __m512i
732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
734{
735 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
736 (__v8di) __Y,
737 (__v8di) __W,
738 (__mmask8) __U);
739}
740
741extern __inline __m512i
742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
744{
745 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
746 (__v8di) __Y,
747 (__v8di)
748 _mm512_setzero_si512 (),
749 (__mmask8) __U);
750}
751
752extern __inline __m512i
753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754_mm512_srav_epi64 (__m512i __X, __m512i __Y)
755{
756 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
757 (__v8di) __Y,
758 (__v8di)
4271e5cb 759 _mm512_undefined_epi32 (),
756c5857
AI
760 (__mmask8) -1);
761}
762
763extern __inline __m512i
764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
766{
767 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
768 (__v8di) __Y,
769 (__v8di) __W,
770 (__mmask8) __U);
771}
772
773extern __inline __m512i
774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
776{
777 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
778 (__v8di) __Y,
779 (__v8di)
780 _mm512_setzero_si512 (),
781 (__mmask8) __U);
782}
783
784extern __inline __m512i
785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
787{
788 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
789 (__v8di) __Y,
790 (__v8di)
4271e5cb 791 _mm512_undefined_epi32 (),
756c5857
AI
792 (__mmask8) -1);
793}
794
795extern __inline __m512i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
798{
799 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
800 (__v8di) __Y,
801 (__v8di) __W,
802 (__mmask8) __U);
803}
804
805extern __inline __m512i
806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
808{
809 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
810 (__v8di) __Y,
811 (__v8di)
812 _mm512_setzero_si512 (),
813 (__mmask8) __U);
814}
815
816extern __inline __m512i
817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818_mm512_add_epi32 (__m512i __A, __m512i __B)
819{
2069d6fc 820 return (__m512i) ((__v16su) __A + (__v16su) __B);
756c5857
AI
821}
822
823extern __inline __m512i
824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
826{
827 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
828 (__v16si) __B,
829 (__v16si) __W,
830 (__mmask16) __U);
831}
832
833extern __inline __m512i
834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
836{
837 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
838 (__v16si) __B,
839 (__v16si)
840 _mm512_setzero_si512 (),
841 (__mmask16) __U);
842}
843
844extern __inline __m512i
845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846_mm512_mul_epi32 (__m512i __X, __m512i __Y)
847{
848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
849 (__v16si) __Y,
850 (__v8di)
4271e5cb 851 _mm512_undefined_epi32 (),
756c5857
AI
852 (__mmask8) -1);
853}
854
855extern __inline __m512i
856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
858{
859 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
860 (__v16si) __Y,
861 (__v8di) __W, __M);
862}
863
864extern __inline __m512i
865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
867{
868 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
869 (__v16si) __Y,
870 (__v8di)
871 _mm512_setzero_si512 (),
872 __M);
873}
874
875extern __inline __m512i
876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877_mm512_sub_epi32 (__m512i __A, __m512i __B)
878{
2069d6fc 879 return (__m512i) ((__v16su) __A - (__v16su) __B);
756c5857
AI
880}
881
882extern __inline __m512i
883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
885{
886 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
887 (__v16si) __B,
888 (__v16si) __W,
889 (__mmask16) __U);
890}
891
892extern __inline __m512i
893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
895{
896 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
897 (__v16si) __B,
898 (__v16si)
899 _mm512_setzero_si512 (),
900 (__mmask16) __U);
901}
902
903extern __inline __m512i
904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
905_mm512_mul_epu32 (__m512i __X, __m512i __Y)
906{
907 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
908 (__v16si) __Y,
909 (__v8di)
4271e5cb 910 _mm512_undefined_epi32 (),
756c5857
AI
911 (__mmask8) -1);
912}
913
914extern __inline __m512i
915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
917{
918 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
919 (__v16si) __Y,
920 (__v8di) __W, __M);
921}
922
923extern __inline __m512i
924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
926{
927 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
928 (__v16si) __Y,
929 (__v8di)
930 _mm512_setzero_si512 (),
931 __M);
932}
933
934#ifdef __OPTIMIZE__
935extern __inline __m512i
936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937_mm512_slli_epi64 (__m512i __A, unsigned int __B)
938{
939 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
940 (__v8di)
4271e5cb 941 _mm512_undefined_epi32 (),
756c5857
AI
942 (__mmask8) -1);
943}
944
945extern __inline __m512i
946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
948 unsigned int __B)
949{
950 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
951 (__v8di) __W,
952 (__mmask8) __U);
953}
954
955extern __inline __m512i
956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
958{
959 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
960 (__v8di)
961 _mm512_setzero_si512 (),
962 (__mmask8) __U);
963}
964#else
965#define _mm512_slli_epi64(X, C) \
966 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 967 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
968 (__mmask8)-1))
969
970#define _mm512_mask_slli_epi64(W, U, X, C) \
971 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
972 (__v8di)(__m512i)(W),\
973 (__mmask8)(U)))
974
975#define _mm512_maskz_slli_epi64(U, X, C) \
976 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
977 (__v8di)(__m512i)_mm512_setzero_si512 (),\
978 (__mmask8)(U)))
979#endif
980
981extern __inline __m512i
982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983_mm512_sll_epi64 (__m512i __A, __m128i __B)
984{
985 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
986 (__v2di) __B,
987 (__v8di)
4271e5cb 988 _mm512_undefined_epi32 (),
756c5857
AI
989 (__mmask8) -1);
990}
991
992extern __inline __m512i
993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
995{
996 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
997 (__v2di) __B,
998 (__v8di) __W,
999 (__mmask8) __U);
1000}
1001
1002extern __inline __m512i
1003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1004_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1005{
1006 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1007 (__v2di) __B,
1008 (__v8di)
1009 _mm512_setzero_si512 (),
1010 (__mmask8) __U);
1011}
1012
1013#ifdef __OPTIMIZE__
1014extern __inline __m512i
1015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016_mm512_srli_epi64 (__m512i __A, unsigned int __B)
1017{
1018 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1019 (__v8di)
4271e5cb 1020 _mm512_undefined_epi32 (),
756c5857
AI
1021 (__mmask8) -1);
1022}
1023
1024extern __inline __m512i
1025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1027 __m512i __A, unsigned int __B)
1028{
1029 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1030 (__v8di) __W,
1031 (__mmask8) __U);
1032}
1033
1034extern __inline __m512i
1035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1037{
1038 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1039 (__v8di)
1040 _mm512_setzero_si512 (),
1041 (__mmask8) __U);
1042}
1043#else
1044#define _mm512_srli_epi64(X, C) \
1045 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1046 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1047 (__mmask8)-1))
1048
1049#define _mm512_mask_srli_epi64(W, U, X, C) \
1050 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1051 (__v8di)(__m512i)(W),\
1052 (__mmask8)(U)))
1053
1054#define _mm512_maskz_srli_epi64(U, X, C) \
1055 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1056 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1057 (__mmask8)(U)))
1058#endif
1059
1060extern __inline __m512i
1061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062_mm512_srl_epi64 (__m512i __A, __m128i __B)
1063{
1064 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1065 (__v2di) __B,
1066 (__v8di)
4271e5cb 1067 _mm512_undefined_epi32 (),
756c5857
AI
1068 (__mmask8) -1);
1069}
1070
1071extern __inline __m512i
1072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1074{
1075 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1076 (__v2di) __B,
1077 (__v8di) __W,
1078 (__mmask8) __U);
1079}
1080
1081extern __inline __m512i
1082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1084{
1085 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1086 (__v2di) __B,
1087 (__v8di)
1088 _mm512_setzero_si512 (),
1089 (__mmask8) __U);
1090}
1091
1092#ifdef __OPTIMIZE__
1093extern __inline __m512i
1094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1096{
1097 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1098 (__v8di)
4271e5cb 1099 _mm512_undefined_epi32 (),
756c5857
AI
1100 (__mmask8) -1);
1101}
1102
1103extern __inline __m512i
1104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1106 unsigned int __B)
1107{
1108 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1109 (__v8di) __W,
1110 (__mmask8) __U);
1111}
1112
1113extern __inline __m512i
1114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1115_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1116{
1117 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1118 (__v8di)
1119 _mm512_setzero_si512 (),
1120 (__mmask8) __U);
1121}
1122#else
1123#define _mm512_srai_epi64(X, C) \
1124 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1125 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1126 (__mmask8)-1))
1127
1128#define _mm512_mask_srai_epi64(W, U, X, C) \
1129 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1130 (__v8di)(__m512i)(W),\
1131 (__mmask8)(U)))
1132
1133#define _mm512_maskz_srai_epi64(U, X, C) \
1134 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1135 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1136 (__mmask8)(U)))
1137#endif
1138
1139extern __inline __m512i
1140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141_mm512_sra_epi64 (__m512i __A, __m128i __B)
1142{
1143 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1144 (__v2di) __B,
1145 (__v8di)
4271e5cb 1146 _mm512_undefined_epi32 (),
756c5857
AI
1147 (__mmask8) -1);
1148}
1149
1150extern __inline __m512i
1151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1152_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1153{
1154 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1155 (__v2di) __B,
1156 (__v8di) __W,
1157 (__mmask8) __U);
1158}
1159
1160extern __inline __m512i
1161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1162_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1163{
1164 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1165 (__v2di) __B,
1166 (__v8di)
1167 _mm512_setzero_si512 (),
1168 (__mmask8) __U);
1169}
1170
1171#ifdef __OPTIMIZE__
1172extern __inline __m512i
1173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1175{
1176 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1177 (__v16si)
4271e5cb 1178 _mm512_undefined_epi32 (),
756c5857
AI
1179 (__mmask16) -1);
1180}
1181
1182extern __inline __m512i
1183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1184_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1185 unsigned int __B)
1186{
1187 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1188 (__v16si) __W,
1189 (__mmask16) __U);
1190}
1191
1192extern __inline __m512i
1193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1195{
1196 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1197 (__v16si)
1198 _mm512_setzero_si512 (),
1199 (__mmask16) __U);
1200}
1201#else
1202#define _mm512_slli_epi32(X, C) \
1203 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1204 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1205 (__mmask16)-1))
1206
1207#define _mm512_mask_slli_epi32(W, U, X, C) \
1208 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1209 (__v16si)(__m512i)(W),\
1210 (__mmask16)(U)))
1211
1212#define _mm512_maskz_slli_epi32(U, X, C) \
1213 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1214 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1215 (__mmask16)(U)))
1216#endif
1217
1218extern __inline __m512i
1219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220_mm512_sll_epi32 (__m512i __A, __m128i __B)
1221{
1222 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1223 (__v4si) __B,
1224 (__v16si)
4271e5cb 1225 _mm512_undefined_epi32 (),
756c5857
AI
1226 (__mmask16) -1);
1227}
1228
1229extern __inline __m512i
1230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1232{
1233 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1234 (__v4si) __B,
1235 (__v16si) __W,
1236 (__mmask16) __U);
1237}
1238
1239extern __inline __m512i
1240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1242{
1243 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1244 (__v4si) __B,
1245 (__v16si)
1246 _mm512_setzero_si512 (),
1247 (__mmask16) __U);
1248}
1249
1250#ifdef __OPTIMIZE__
1251extern __inline __m512i
1252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1253_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1254{
1255 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1256 (__v16si)
4271e5cb 1257 _mm512_undefined_epi32 (),
756c5857
AI
1258 (__mmask16) -1);
1259}
1260
1261extern __inline __m512i
1262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1264 __m512i __A, unsigned int __B)
1265{
1266 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1267 (__v16si) __W,
1268 (__mmask16) __U);
1269}
1270
1271extern __inline __m512i
1272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1274{
1275 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1276 (__v16si)
1277 _mm512_setzero_si512 (),
1278 (__mmask16) __U);
1279}
1280#else
1281#define _mm512_srli_epi32(X, C) \
1282 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1283 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1284 (__mmask16)-1))
1285
1286#define _mm512_mask_srli_epi32(W, U, X, C) \
1287 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1288 (__v16si)(__m512i)(W),\
1289 (__mmask16)(U)))
1290
1291#define _mm512_maskz_srli_epi32(U, X, C) \
1292 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1293 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1294 (__mmask16)(U)))
1295#endif
1296
1297extern __inline __m512i
1298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299_mm512_srl_epi32 (__m512i __A, __m128i __B)
1300{
1301 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1302 (__v4si) __B,
1303 (__v16si)
4271e5cb 1304 _mm512_undefined_epi32 (),
756c5857
AI
1305 (__mmask16) -1);
1306}
1307
1308extern __inline __m512i
1309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1311{
1312 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1313 (__v4si) __B,
1314 (__v16si) __W,
1315 (__mmask16) __U);
1316}
1317
1318extern __inline __m512i
1319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1320_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1321{
1322 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1323 (__v4si) __B,
1324 (__v16si)
1325 _mm512_setzero_si512 (),
1326 (__mmask16) __U);
1327}
1328
1329#ifdef __OPTIMIZE__
1330extern __inline __m512i
1331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1333{
1334 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1335 (__v16si)
4271e5cb 1336 _mm512_undefined_epi32 (),
756c5857
AI
1337 (__mmask16) -1);
1338}
1339
1340extern __inline __m512i
1341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1343 unsigned int __B)
1344{
1345 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1346 (__v16si) __W,
1347 (__mmask16) __U);
1348}
1349
1350extern __inline __m512i
1351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1353{
1354 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1355 (__v16si)
1356 _mm512_setzero_si512 (),
1357 (__mmask16) __U);
1358}
1359#else
1360#define _mm512_srai_epi32(X, C) \
1361 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1362 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1363 (__mmask16)-1))
1364
1365#define _mm512_mask_srai_epi32(W, U, X, C) \
1366 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1367 (__v16si)(__m512i)(W),\
1368 (__mmask16)(U)))
1369
1370#define _mm512_maskz_srai_epi32(U, X, C) \
1371 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1372 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1373 (__mmask16)(U)))
1374#endif
1375
1376extern __inline __m512i
1377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378_mm512_sra_epi32 (__m512i __A, __m128i __B)
1379{
1380 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1381 (__v4si) __B,
1382 (__v16si)
4271e5cb 1383 _mm512_undefined_epi32 (),
756c5857
AI
1384 (__mmask16) -1);
1385}
1386
1387extern __inline __m512i
1388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1390{
1391 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1392 (__v4si) __B,
1393 (__v16si) __W,
1394 (__mmask16) __U);
1395}
1396
1397extern __inline __m512i
1398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1400{
1401 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1402 (__v4si) __B,
1403 (__v16si)
1404 _mm512_setzero_si512 (),
1405 (__mmask16) __U);
1406}
1407
075691af
AI
1408#ifdef __OPTIMIZE__
1409extern __inline __m128d
1410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1411_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1412{
1413 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1414 (__v2df) __B,
1415 __R);
1416}
1417
1853f5c7
SP
1418extern __inline __m128d
1419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1421 __m128d __B, const int __R)
1422{
1423 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1424 (__v2df) __B,
1425 (__v2df) __W,
1426 (__mmask8) __U, __R);
1427}
1428
1429extern __inline __m128d
1430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1432 const int __R)
1433{
1434 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1435 (__v2df) __B,
1436 (__v2df)
1437 _mm_setzero_pd (),
1438 (__mmask8) __U, __R);
1439}
1440
075691af
AI
1441extern __inline __m128
1442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1443_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1444{
1445 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1446 (__v4sf) __B,
1447 __R);
1448}
1449
1853f5c7
SP
1450extern __inline __m128
1451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1452_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1453 __m128 __B, const int __R)
1454{
1455 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1456 (__v4sf) __B,
1457 (__v4sf) __W,
1458 (__mmask8) __U, __R);
1459}
1460
1461extern __inline __m128
1462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1463_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1464 const int __R)
1465{
1466 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1467 (__v4sf) __B,
1468 (__v4sf)
1469 _mm_setzero_ps (),
1470 (__mmask8) __U, __R);
1471}
1472
075691af
AI
1473extern __inline __m128d
1474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1475_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1476{
1477 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1478 (__v2df) __B,
1479 __R);
1480}
1481
1853f5c7
SP
1482extern __inline __m128d
1483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1485 __m128d __B, const int __R)
1486{
1487 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1488 (__v2df) __B,
1489 (__v2df) __W,
1490 (__mmask8) __U, __R);
1491}
1492
1493extern __inline __m128d
1494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1496 const int __R)
1497{
1498 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1499 (__v2df) __B,
1500 (__v2df)
1501 _mm_setzero_pd (),
1502 (__mmask8) __U, __R);
1503}
1504
075691af
AI
1505extern __inline __m128
1506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1507_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1508{
1509 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1510 (__v4sf) __B,
1511 __R);
1512}
1513
1853f5c7
SP
1514extern __inline __m128
1515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1517 __m128 __B, const int __R)
1518{
1519 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1520 (__v4sf) __B,
1521 (__v4sf) __W,
1522 (__mmask8) __U, __R);
1523}
1524
1525extern __inline __m128
1526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1527_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1528 const int __R)
1529{
1530 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1531 (__v4sf) __B,
1532 (__v4sf)
1533 _mm_setzero_ps (),
1534 (__mmask8) __U, __R);
1535}
1536
075691af
AI
1537#else
1538#define _mm_add_round_sd(A, B, C) \
1539 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1540
1853f5c7
SP
1541#define _mm_mask_add_round_sd(W, U, A, B, C) \
1542 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1543
1544#define _mm_maskz_add_round_sd(U, A, B, C) \
1545 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1546
075691af
AI
1547#define _mm_add_round_ss(A, B, C) \
1548 (__m128)__builtin_ia32_addss_round(A, B, C)
1549
1853f5c7
SP
1550#define _mm_mask_add_round_ss(W, U, A, B, C) \
1551 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1552
1553#define _mm_maskz_add_round_ss(U, A, B, C) \
1554 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1555
075691af
AI
1556#define _mm_sub_round_sd(A, B, C) \
1557 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1558
1853f5c7
SP
1559#define _mm_mask_sub_round_sd(W, U, A, B, C) \
1560 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1561
1562#define _mm_maskz_sub_round_sd(U, A, B, C) \
1563 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1564
075691af
AI
1565#define _mm_sub_round_ss(A, B, C) \
1566 (__m128)__builtin_ia32_subss_round(A, B, C)
1853f5c7
SP
1567
1568#define _mm_mask_sub_round_ss(W, U, A, B, C) \
1569 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1570
1571#define _mm_maskz_sub_round_ss(U, A, B, C) \
1572 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1573
075691af
AI
1574#endif
1575
756c5857
AI
1576#ifdef __OPTIMIZE__
1577extern __inline __m512i
1578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1579_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1580 const int __imm)
756c5857
AI
1581{
1582 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1583 (__v8di) __B,
b5fd0b71 1584 (__v8di) __C, __imm,
756c5857
AI
1585 (__mmask8) -1);
1586}
1587
1588extern __inline __m512i
1589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
b5fd0b71 1591 __m512i __C, const int __imm)
756c5857
AI
1592{
1593 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1594 (__v8di) __B,
b5fd0b71 1595 (__v8di) __C, __imm,
756c5857
AI
1596 (__mmask8) __U);
1597}
1598
1599extern __inline __m512i
1600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
b5fd0b71 1602 __m512i __C, const int __imm)
756c5857
AI
1603{
1604 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1605 (__v8di) __B,
1606 (__v8di) __C,
b5fd0b71 1607 __imm, (__mmask8) __U);
756c5857
AI
1608}
1609
1610extern __inline __m512i
1611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1612_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1613 const int __imm)
756c5857
AI
1614{
1615 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1616 (__v16si) __B,
1617 (__v16si) __C,
b5fd0b71 1618 __imm, (__mmask16) -1);
756c5857
AI
1619}
1620
1621extern __inline __m512i
1622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1623_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
b5fd0b71 1624 __m512i __C, const int __imm)
756c5857
AI
1625{
1626 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1627 (__v16si) __B,
1628 (__v16si) __C,
b5fd0b71 1629 __imm, (__mmask16) __U);
756c5857
AI
1630}
1631
1632extern __inline __m512i
1633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
b5fd0b71 1635 __m512i __C, const int __imm)
756c5857
AI
1636{
1637 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1638 (__v16si) __B,
1639 (__v16si) __C,
b5fd0b71 1640 __imm, (__mmask16) __U);
756c5857
AI
1641}
1642#else
1643#define _mm512_ternarylogic_epi64(A, B, C, I) \
1644 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1645 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1646#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1647 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1648 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1649#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1650 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1651 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1652#define _mm512_ternarylogic_epi32(A, B, C, I) \
1653 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1654 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1655 (__mmask16)-1))
1656#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1657 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1658 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1659 (__mmask16)(U)))
1660#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1661 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1662 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1663 (__mmask16)(U)))
1664#endif
1665
1666extern __inline __m512d
1667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1668_mm512_rcp14_pd (__m512d __A)
1669{
1670 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1671 (__v8df)
0b192937 1672 _mm512_undefined_pd (),
756c5857
AI
1673 (__mmask8) -1);
1674}
1675
1676extern __inline __m512d
1677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1679{
1680 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1681 (__v8df) __W,
1682 (__mmask8) __U);
1683}
1684
1685extern __inline __m512d
1686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1688{
1689 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1690 (__v8df)
1691 _mm512_setzero_pd (),
1692 (__mmask8) __U);
1693}
1694
1695extern __inline __m512
1696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697_mm512_rcp14_ps (__m512 __A)
1698{
1699 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1700 (__v16sf)
0b192937 1701 _mm512_undefined_ps (),
756c5857
AI
1702 (__mmask16) -1);
1703}
1704
1705extern __inline __m512
1706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1708{
1709 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1710 (__v16sf) __W,
1711 (__mmask16) __U);
1712}
1713
1714extern __inline __m512
1715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1716_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1717{
1718 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1719 (__v16sf)
1720 _mm512_setzero_ps (),
1721 (__mmask16) __U);
1722}
1723
075691af
AI
1724extern __inline __m128d
1725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1726_mm_rcp14_sd (__m128d __A, __m128d __B)
1727{
df62b4af
IT
1728 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1729 (__v2df) __A);
075691af
AI
1730}
1731
f4ee3a9e
UB
1732extern __inline __m128d
1733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1735{
1736 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1737 (__v2df) __A,
1738 (__v2df) __W,
1739 (__mmask8) __U);
1740}
1741
1742extern __inline __m128d
1743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1745{
1746 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1747 (__v2df) __A,
1748 (__v2df) _mm_setzero_ps (),
1749 (__mmask8) __U);
1750}
1751
075691af
AI
1752extern __inline __m128
1753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754_mm_rcp14_ss (__m128 __A, __m128 __B)
1755{
df62b4af
IT
1756 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1757 (__v4sf) __A);
075691af
AI
1758}
1759
f4ee3a9e
UB
1760extern __inline __m128
1761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1763{
1764 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1765 (__v4sf) __A,
1766 (__v4sf) __W,
1767 (__mmask8) __U);
1768}
1769
1770extern __inline __m128
1771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1772_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1773{
1774 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1775 (__v4sf) __A,
1776 (__v4sf) _mm_setzero_ps (),
1777 (__mmask8) __U);
1778}
1779
756c5857
AI
1780extern __inline __m512d
1781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1782_mm512_rsqrt14_pd (__m512d __A)
1783{
1784 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1785 (__v8df)
0b192937 1786 _mm512_undefined_pd (),
756c5857
AI
1787 (__mmask8) -1);
1788}
1789
1790extern __inline __m512d
1791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1793{
1794 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1795 (__v8df) __W,
1796 (__mmask8) __U);
1797}
1798
1799extern __inline __m512d
1800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1802{
1803 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1804 (__v8df)
1805 _mm512_setzero_pd (),
1806 (__mmask8) __U);
1807}
1808
1809extern __inline __m512
1810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811_mm512_rsqrt14_ps (__m512 __A)
1812{
1813 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1814 (__v16sf)
0b192937 1815 _mm512_undefined_ps (),
756c5857
AI
1816 (__mmask16) -1);
1817}
1818
1819extern __inline __m512
1820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1821_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1822{
1823 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1824 (__v16sf) __W,
1825 (__mmask16) __U);
1826}
1827
1828extern __inline __m512
1829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1830_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1831{
1832 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1833 (__v16sf)
1834 _mm512_setzero_ps (),
1835 (__mmask16) __U);
1836}
1837
075691af
AI
1838extern __inline __m128d
1839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1840_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1841{
df62b4af
IT
1842 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1843 (__v2df) __A);
075691af
AI
1844}
1845
d7a33a4c
JK
1846extern __inline __m128d
1847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1848_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1849{
1850 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1851 (__v2df) __A,
1852 (__v2df) __W,
1853 (__mmask8) __U);
1854}
1855
1856extern __inline __m128d
1857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1859{
1860 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1861 (__v2df) __A,
1862 (__v2df) _mm_setzero_pd (),
1863 (__mmask8) __U);
1864}
1865
075691af
AI
1866extern __inline __m128
1867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1868_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1869{
df62b4af
IT
1870 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1871 (__v4sf) __A);
075691af
AI
1872}
1873
d7a33a4c
JK
1874extern __inline __m128
1875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1877{
1878 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1879 (__v4sf) __A,
1880 (__v4sf) __W,
1881 (__mmask8) __U);
1882}
1883
1884extern __inline __m128
1885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1886_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1887{
1888 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1889 (__v4sf) __A,
1890 (__v4sf) _mm_setzero_ps (),
1891 (__mmask8) __U);
1892}
1893
756c5857
AI
1894#ifdef __OPTIMIZE__
1895extern __inline __m512d
1896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1897_mm512_sqrt_round_pd (__m512d __A, const int __R)
1898{
1899 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1900 (__v8df)
0b192937 1901 _mm512_undefined_pd (),
756c5857
AI
1902 (__mmask8) -1, __R);
1903}
1904
1905extern __inline __m512d
1906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1907_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1908 const int __R)
1909{
1910 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1911 (__v8df) __W,
1912 (__mmask8) __U, __R);
1913}
1914
1915extern __inline __m512d
1916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1917_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1918{
1919 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1920 (__v8df)
1921 _mm512_setzero_pd (),
1922 (__mmask8) __U, __R);
1923}
1924
1925extern __inline __m512
1926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1927_mm512_sqrt_round_ps (__m512 __A, const int __R)
1928{
1929 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1930 (__v16sf)
0b192937 1931 _mm512_undefined_ps (),
756c5857
AI
1932 (__mmask16) -1, __R);
1933}
1934
1935extern __inline __m512
1936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1937_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1938{
1939 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1940 (__v16sf) __W,
1941 (__mmask16) __U, __R);
1942}
1943
1944extern __inline __m512
1945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1946_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1947{
1948 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1949 (__v16sf)
1950 _mm512_setzero_ps (),
1951 (__mmask16) __U, __R);
1952}
1953
075691af
AI
1954extern __inline __m128d
1955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1957{
1958 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1959 (__v2df) __A,
1960 __R);
1961}
1962
1963extern __inline __m128
1964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1965_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1966{
1967 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1968 (__v4sf) __A,
1969 __R);
1970}
756c5857
AI
1971#else
1972#define _mm512_sqrt_round_pd(A, C) \
0b192937 1973 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
1974
1975#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1976 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1977
1978#define _mm512_maskz_sqrt_round_pd(U, A, C) \
1979 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1980
1981#define _mm512_sqrt_round_ps(A, C) \
0b192937 1982 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
1983
1984#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1985 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1986
1987#define _mm512_maskz_sqrt_round_ps(U, A, C) \
1988 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
1989
1990#define _mm_sqrt_round_sd(A, B, C) \
1991 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1992
1993#define _mm_sqrt_round_ss(A, B, C) \
1994 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
756c5857
AI
1995#endif
1996
1997extern __inline __m512i
1998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999_mm512_cvtepi8_epi32 (__m128i __A)
2000{
2001 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2002 (__v16si)
4271e5cb 2003 _mm512_undefined_epi32 (),
756c5857
AI
2004 (__mmask16) -1);
2005}
2006
2007extern __inline __m512i
2008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2010{
2011 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2012 (__v16si) __W,
2013 (__mmask16) __U);
2014}
2015
2016extern __inline __m512i
2017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2018_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2019{
2020 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2021 (__v16si)
2022 _mm512_setzero_si512 (),
2023 (__mmask16) __U);
2024}
2025
2026extern __inline __m512i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm512_cvtepi8_epi64 (__m128i __A)
2029{
2030 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2031 (__v8di)
4271e5cb 2032 _mm512_undefined_epi32 (),
756c5857
AI
2033 (__mmask8) -1);
2034}
2035
2036extern __inline __m512i
2037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2039{
2040 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2041 (__v8di) __W,
2042 (__mmask8) __U);
2043}
2044
2045extern __inline __m512i
2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2048{
2049 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2050 (__v8di)
2051 _mm512_setzero_si512 (),
2052 (__mmask8) __U);
2053}
2054
2055extern __inline __m512i
2056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057_mm512_cvtepi16_epi32 (__m256i __A)
2058{
2059 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2060 (__v16si)
4271e5cb 2061 _mm512_undefined_epi32 (),
756c5857
AI
2062 (__mmask16) -1);
2063}
2064
2065extern __inline __m512i
2066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2067_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2068{
2069 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2070 (__v16si) __W,
2071 (__mmask16) __U);
2072}
2073
2074extern __inline __m512i
2075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2076_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2077{
2078 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2079 (__v16si)
2080 _mm512_setzero_si512 (),
2081 (__mmask16) __U);
2082}
2083
2084extern __inline __m512i
2085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086_mm512_cvtepi16_epi64 (__m128i __A)
2087{
2088 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2089 (__v8di)
4271e5cb 2090 _mm512_undefined_epi32 (),
756c5857
AI
2091 (__mmask8) -1);
2092}
2093
2094extern __inline __m512i
2095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2096_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2097{
2098 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2099 (__v8di) __W,
2100 (__mmask8) __U);
2101}
2102
2103extern __inline __m512i
2104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2105_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2106{
2107 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2108 (__v8di)
2109 _mm512_setzero_si512 (),
2110 (__mmask8) __U);
2111}
2112
2113extern __inline __m512i
2114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2115_mm512_cvtepi32_epi64 (__m256i __X)
2116{
2117 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2118 (__v8di)
4271e5cb 2119 _mm512_undefined_epi32 (),
756c5857
AI
2120 (__mmask8) -1);
2121}
2122
2123extern __inline __m512i
2124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2125_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2126{
2127 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2128 (__v8di) __W,
2129 (__mmask8) __U);
2130}
2131
2132extern __inline __m512i
2133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2135{
2136 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2137 (__v8di)
2138 _mm512_setzero_si512 (),
2139 (__mmask8) __U);
2140}
2141
2142extern __inline __m512i
2143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144_mm512_cvtepu8_epi32 (__m128i __A)
2145{
2146 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2147 (__v16si)
4271e5cb 2148 _mm512_undefined_epi32 (),
756c5857
AI
2149 (__mmask16) -1);
2150}
2151
2152extern __inline __m512i
2153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2155{
2156 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2157 (__v16si) __W,
2158 (__mmask16) __U);
2159}
2160
2161extern __inline __m512i
2162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2163_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2164{
2165 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2166 (__v16si)
2167 _mm512_setzero_si512 (),
2168 (__mmask16) __U);
2169}
2170
2171extern __inline __m512i
2172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173_mm512_cvtepu8_epi64 (__m128i __A)
2174{
2175 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2176 (__v8di)
4271e5cb 2177 _mm512_undefined_epi32 (),
756c5857
AI
2178 (__mmask8) -1);
2179}
2180
2181extern __inline __m512i
2182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2183_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2184{
2185 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2186 (__v8di) __W,
2187 (__mmask8) __U);
2188}
2189
2190extern __inline __m512i
2191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2192_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2193{
2194 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2195 (__v8di)
2196 _mm512_setzero_si512 (),
2197 (__mmask8) __U);
2198}
2199
2200extern __inline __m512i
2201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202_mm512_cvtepu16_epi32 (__m256i __A)
2203{
2204 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2205 (__v16si)
4271e5cb 2206 _mm512_undefined_epi32 (),
756c5857
AI
2207 (__mmask16) -1);
2208}
2209
2210extern __inline __m512i
2211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2212_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2213{
2214 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2215 (__v16si) __W,
2216 (__mmask16) __U);
2217}
2218
2219extern __inline __m512i
2220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2221_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2222{
2223 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2224 (__v16si)
2225 _mm512_setzero_si512 (),
2226 (__mmask16) __U);
2227}
2228
2229extern __inline __m512i
2230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2231_mm512_cvtepu16_epi64 (__m128i __A)
2232{
2233 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2234 (__v8di)
4271e5cb 2235 _mm512_undefined_epi32 (),
756c5857
AI
2236 (__mmask8) -1);
2237}
2238
2239extern __inline __m512i
2240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2242{
2243 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2244 (__v8di) __W,
2245 (__mmask8) __U);
2246}
2247
2248extern __inline __m512i
2249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2250_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2251{
2252 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2253 (__v8di)
2254 _mm512_setzero_si512 (),
2255 (__mmask8) __U);
2256}
2257
2258extern __inline __m512i
2259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260_mm512_cvtepu32_epi64 (__m256i __X)
2261{
2262 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2263 (__v8di)
4271e5cb 2264 _mm512_undefined_epi32 (),
756c5857
AI
2265 (__mmask8) -1);
2266}
2267
2268extern __inline __m512i
2269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2270_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2271{
2272 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2273 (__v8di) __W,
2274 (__mmask8) __U);
2275}
2276
2277extern __inline __m512i
2278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2280{
2281 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2282 (__v8di)
2283 _mm512_setzero_si512 (),
2284 (__mmask8) __U);
2285}
2286
2287#ifdef __OPTIMIZE__
2288extern __inline __m512d
2289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2290_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2291{
2292 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2293 (__v8df) __B,
2294 (__v8df)
0b192937 2295 _mm512_undefined_pd (),
756c5857
AI
2296 (__mmask8) -1, __R);
2297}
2298
2299extern __inline __m512d
2300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2301_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2302 __m512d __B, const int __R)
2303{
2304 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2305 (__v8df) __B,
2306 (__v8df) __W,
2307 (__mmask8) __U, __R);
2308}
2309
2310extern __inline __m512d
2311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2313 const int __R)
2314{
2315 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2316 (__v8df) __B,
2317 (__v8df)
2318 _mm512_setzero_pd (),
2319 (__mmask8) __U, __R);
2320}
2321
2322extern __inline __m512
2323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2325{
2326 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2327 (__v16sf) __B,
2328 (__v16sf)
0b192937 2329 _mm512_undefined_ps (),
756c5857
AI
2330 (__mmask16) -1, __R);
2331}
2332
2333extern __inline __m512
2334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2335_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2336 __m512 __B, const int __R)
2337{
2338 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2339 (__v16sf) __B,
2340 (__v16sf) __W,
2341 (__mmask16) __U, __R);
2342}
2343
2344extern __inline __m512
2345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2347{
2348 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2349 (__v16sf) __B,
2350 (__v16sf)
2351 _mm512_setzero_ps (),
2352 (__mmask16) __U, __R);
2353}
2354
2355extern __inline __m512d
2356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2357_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2358{
2359 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2360 (__v8df) __B,
2361 (__v8df)
0b192937 2362 _mm512_undefined_pd (),
756c5857
AI
2363 (__mmask8) -1, __R);
2364}
2365
2366extern __inline __m512d
2367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2368_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2369 __m512d __B, const int __R)
2370{
2371 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2372 (__v8df) __B,
2373 (__v8df) __W,
2374 (__mmask8) __U, __R);
2375}
2376
2377extern __inline __m512d
2378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2380 const int __R)
2381{
2382 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2383 (__v8df) __B,
2384 (__v8df)
2385 _mm512_setzero_pd (),
2386 (__mmask8) __U, __R);
2387}
2388
2389extern __inline __m512
2390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2391_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2392{
2393 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2394 (__v16sf) __B,
2395 (__v16sf)
0b192937 2396 _mm512_undefined_ps (),
756c5857
AI
2397 (__mmask16) -1, __R);
2398}
2399
2400extern __inline __m512
2401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2402_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2403 __m512 __B, const int __R)
2404{
2405 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2406 (__v16sf) __B,
2407 (__v16sf) __W,
2408 (__mmask16) __U, __R);
2409}
2410
2411extern __inline __m512
2412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2413_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2414{
2415 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2416 (__v16sf) __B,
2417 (__v16sf)
2418 _mm512_setzero_ps (),
2419 (__mmask16) __U, __R);
2420}
2421#else
2422#define _mm512_add_round_pd(A, B, C) \
0b192937 2423 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2424
2425#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2426 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2427
2428#define _mm512_maskz_add_round_pd(U, A, B, C) \
2429 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2430
2431#define _mm512_add_round_ps(A, B, C) \
0b192937 2432 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2433
2434#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2435 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2436
2437#define _mm512_maskz_add_round_ps(U, A, B, C) \
2438 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2439
2440#define _mm512_sub_round_pd(A, B, C) \
0b192937 2441 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2442
2443#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2444 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2445
2446#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2447 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2448
2449#define _mm512_sub_round_ps(A, B, C) \
0b192937 2450 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2451
2452#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2453 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2454
2455#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2456 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2457#endif
2458
2459#ifdef __OPTIMIZE__
2460extern __inline __m512d
2461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2462_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2463{
2464 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2465 (__v8df) __B,
2466 (__v8df)
0b192937 2467 _mm512_undefined_pd (),
756c5857
AI
2468 (__mmask8) -1, __R);
2469}
2470
2471extern __inline __m512d
2472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2473_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2474 __m512d __B, const int __R)
2475{
2476 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2477 (__v8df) __B,
2478 (__v8df) __W,
2479 (__mmask8) __U, __R);
2480}
2481
2482extern __inline __m512d
2483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2484_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2485 const int __R)
2486{
2487 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2488 (__v8df) __B,
2489 (__v8df)
2490 _mm512_setzero_pd (),
2491 (__mmask8) __U, __R);
2492}
2493
2494extern __inline __m512
2495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2496_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2497{
2498 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2499 (__v16sf) __B,
2500 (__v16sf)
0b192937 2501 _mm512_undefined_ps (),
756c5857
AI
2502 (__mmask16) -1, __R);
2503}
2504
2505extern __inline __m512
2506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2507_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2508 __m512 __B, const int __R)
2509{
2510 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2511 (__v16sf) __B,
2512 (__v16sf) __W,
2513 (__mmask16) __U, __R);
2514}
2515
2516extern __inline __m512
2517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2518_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2519{
2520 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2521 (__v16sf) __B,
2522 (__v16sf)
2523 _mm512_setzero_ps (),
2524 (__mmask16) __U, __R);
2525}
2526
2527extern __inline __m512d
2528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2529_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2530{
2531 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2532 (__v8df) __V,
2533 (__v8df)
0b192937 2534 _mm512_undefined_pd (),
756c5857
AI
2535 (__mmask8) -1, __R);
2536}
2537
2538extern __inline __m512d
2539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2540_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2541 __m512d __V, const int __R)
2542{
2543 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2544 (__v8df) __V,
2545 (__v8df) __W,
2546 (__mmask8) __U, __R);
2547}
2548
2549extern __inline __m512d
2550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2551_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2552 const int __R)
2553{
2554 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2555 (__v8df) __V,
2556 (__v8df)
2557 _mm512_setzero_pd (),
2558 (__mmask8) __U, __R);
2559}
2560
2561extern __inline __m512
2562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2563_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2564{
2565 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2566 (__v16sf) __B,
2567 (__v16sf)
0b192937 2568 _mm512_undefined_ps (),
756c5857
AI
2569 (__mmask16) -1, __R);
2570}
2571
2572extern __inline __m512
2573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2574_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2575 __m512 __B, const int __R)
2576{
2577 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2578 (__v16sf) __B,
2579 (__v16sf) __W,
2580 (__mmask16) __U, __R);
2581}
2582
2583extern __inline __m512
2584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2585_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2586{
2587 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2588 (__v16sf) __B,
2589 (__v16sf)
2590 _mm512_setzero_ps (),
2591 (__mmask16) __U, __R);
2592}
2593
075691af
AI
2594extern __inline __m128d
2595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2596_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2597{
2598 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2599 (__v2df) __B,
2600 __R);
2601}
2602
f4ee3a9e
UB
2603extern __inline __m128d
2604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2606 __m128d __B, const int __R)
2607{
2608 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2609 (__v2df) __B,
2610 (__v2df) __W,
2611 (__mmask8) __U, __R);
2612}
2613
2614extern __inline __m128d
2615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2616_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2617 const int __R)
2618{
2619 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2620 (__v2df) __B,
2621 (__v2df)
2622 _mm_setzero_pd (),
2623 (__mmask8) __U, __R);
2624}
2625
075691af
AI
2626extern __inline __m128
2627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2628_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2629{
2630 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2631 (__v4sf) __B,
2632 __R);
2633}
2634
f4ee3a9e
UB
2635extern __inline __m128
2636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2637_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2638 __m128 __B, const int __R)
2639{
2640 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2641 (__v4sf) __B,
2642 (__v4sf) __W,
2643 (__mmask8) __U, __R);
2644}
2645
2646extern __inline __m128
2647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2648_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2649 const int __R)
2650{
2651 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2652 (__v4sf) __B,
2653 (__v4sf)
2654 _mm_setzero_ps (),
2655 (__mmask8) __U, __R);
2656}
2657
075691af
AI
2658extern __inline __m128d
2659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2661{
2662 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2663 (__v2df) __B,
2664 __R);
2665}
2666
f4ee3a9e
UB
2667extern __inline __m128d
2668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2670 __m128d __B, const int __R)
2671{
2672 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2673 (__v2df) __B,
2674 (__v2df) __W,
2675 (__mmask8) __U, __R);
2676}
2677
2678extern __inline __m128d
2679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2681 const int __R)
2682{
2683 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2684 (__v2df) __B,
2685 (__v2df)
2686 _mm_setzero_pd (),
2687 (__mmask8) __U, __R);
2688}
2689
075691af
AI
2690extern __inline __m128
2691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2693{
2694 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2695 (__v4sf) __B,
2696 __R);
2697}
2698
f4ee3a9e
UB
2699extern __inline __m128
2700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2702 __m128 __B, const int __R)
2703{
2704 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2705 (__v4sf) __B,
2706 (__v4sf) __W,
2707 (__mmask8) __U, __R);
2708}
2709
2710extern __inline __m128
2711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2712_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2713 const int __R)
2714{
2715 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2716 (__v4sf) __B,
2717 (__v4sf)
2718 _mm_setzero_ps (),
2719 (__mmask8) __U, __R);
2720}
2721
756c5857
AI
2722#else
2723#define _mm512_mul_round_pd(A, B, C) \
0b192937 2724 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2725
2726#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2727 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2728
2729#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2730 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2731
2732#define _mm512_mul_round_ps(A, B, C) \
0b192937 2733 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2734
2735#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2736 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2737
2738#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2739 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2740
2741#define _mm512_div_round_pd(A, B, C) \
0b192937 2742 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2743
2744#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2745 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2746
2747#define _mm512_maskz_div_round_pd(U, A, B, C) \
2748 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2749
2750#define _mm512_div_round_ps(A, B, C) \
0b192937 2751 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2752
2753#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2754 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2755
2756#define _mm512_maskz_div_round_ps(U, A, B, C) \
2757 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2758
2759#define _mm_mul_round_sd(A, B, C) \
2760 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2761
f4ee3a9e
UB
2762#define _mm_mask_mul_round_sd(W, U, A, B, C) \
2763 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2764
2765#define _mm_maskz_mul_round_sd(U, A, B, C) \
2766 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2767
075691af
AI
2768#define _mm_mul_round_ss(A, B, C) \
2769 (__m128)__builtin_ia32_mulss_round(A, B, C)
2770
f4ee3a9e
UB
2771#define _mm_mask_mul_round_ss(W, U, A, B, C) \
2772 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2773
2774#define _mm_maskz_mul_round_ss(U, A, B, C) \
2775 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2776
075691af
AI
2777#define _mm_div_round_sd(A, B, C) \
2778 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2779
f4ee3a9e
UB
2780#define _mm_mask_div_round_sd(W, U, A, B, C) \
2781 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2782
2783#define _mm_maskz_div_round_sd(U, A, B, C) \
2784 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2785
075691af
AI
2786#define _mm_div_round_ss(A, B, C) \
2787 (__m128)__builtin_ia32_divss_round(A, B, C)
f4ee3a9e
UB
2788
2789#define _mm_mask_div_round_ss(W, U, A, B, C) \
2790 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2791
2792#define _mm_maskz_div_round_ss(U, A, B, C) \
2793 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2794
756c5857
AI
2795#endif
2796
2797#ifdef __OPTIMIZE__
2798extern __inline __m512d
2799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2800_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2801{
2802 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2803 (__v8df) __B,
2804 (__v8df)
0b192937 2805 _mm512_undefined_pd (),
756c5857
AI
2806 (__mmask8) -1, __R);
2807}
2808
2809extern __inline __m512d
2810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2811_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2812 __m512d __B, const int __R)
2813{
2814 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2815 (__v8df) __B,
2816 (__v8df) __W,
2817 (__mmask8) __U, __R);
2818}
2819
2820extern __inline __m512d
2821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2823 const int __R)
2824{
2825 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2826 (__v8df) __B,
2827 (__v8df)
2828 _mm512_setzero_pd (),
2829 (__mmask8) __U, __R);
2830}
2831
2832extern __inline __m512
2833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2835{
2836 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2837 (__v16sf) __B,
2838 (__v16sf)
0b192937 2839 _mm512_undefined_ps (),
756c5857
AI
2840 (__mmask16) -1, __R);
2841}
2842
2843extern __inline __m512
2844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2846 __m512 __B, const int __R)
2847{
2848 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2849 (__v16sf) __B,
2850 (__v16sf) __W,
2851 (__mmask16) __U, __R);
2852}
2853
2854extern __inline __m512
2855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2856_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2857{
2858 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2859 (__v16sf) __B,
2860 (__v16sf)
2861 _mm512_setzero_ps (),
2862 (__mmask16) __U, __R);
2863}
2864
2865extern __inline __m512d
2866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2867_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2868{
2869 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2870 (__v8df) __B,
2871 (__v8df)
0b192937 2872 _mm512_undefined_pd (),
756c5857
AI
2873 (__mmask8) -1, __R);
2874}
2875
2876extern __inline __m512d
2877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2878_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2879 __m512d __B, const int __R)
2880{
2881 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2882 (__v8df) __B,
2883 (__v8df) __W,
2884 (__mmask8) __U, __R);
2885}
2886
2887extern __inline __m512d
2888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2889_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2890 const int __R)
2891{
2892 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2893 (__v8df) __B,
2894 (__v8df)
2895 _mm512_setzero_pd (),
2896 (__mmask8) __U, __R);
2897}
2898
2899extern __inline __m512
2900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2901_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2902{
2903 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2904 (__v16sf) __B,
2905 (__v16sf)
0b192937 2906 _mm512_undefined_ps (),
756c5857
AI
2907 (__mmask16) -1, __R);
2908}
2909
2910extern __inline __m512
2911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2912_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2913 __m512 __B, const int __R)
2914{
2915 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2916 (__v16sf) __B,
2917 (__v16sf) __W,
2918 (__mmask16) __U, __R);
2919}
2920
2921extern __inline __m512
2922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2923_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2924{
2925 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2926 (__v16sf) __B,
2927 (__v16sf)
2928 _mm512_setzero_ps (),
2929 (__mmask16) __U, __R);
2930}
2931#else
2932#define _mm512_max_round_pd(A, B, R) \
0b192937 2933 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
2934
2935#define _mm512_mask_max_round_pd(W, U, A, B, R) \
2936 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2937
2938#define _mm512_maskz_max_round_pd(U, A, B, R) \
2939 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2940
2941#define _mm512_max_round_ps(A, B, R) \
0b192937 2942 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
756c5857
AI
2943
2944#define _mm512_mask_max_round_ps(W, U, A, B, R) \
2945 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2946
2947#define _mm512_maskz_max_round_ps(U, A, B, R) \
2948 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2949
2950#define _mm512_min_round_pd(A, B, R) \
0b192937 2951 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
2952
2953#define _mm512_mask_min_round_pd(W, U, A, B, R) \
2954 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2955
2956#define _mm512_maskz_min_round_pd(U, A, B, R) \
2957 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2958
2959#define _mm512_min_round_ps(A, B, R) \
0b192937 2960 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
756c5857
AI
2961
2962#define _mm512_mask_min_round_ps(W, U, A, B, R) \
2963 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2964
2965#define _mm512_maskz_min_round_ps(U, A, B, R) \
2966 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2967#endif
2968
2969#ifdef __OPTIMIZE__
2970extern __inline __m512d
2971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2972_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2973{
2974 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2975 (__v8df) __B,
2976 (__v8df)
0b192937 2977 _mm512_undefined_pd (),
756c5857
AI
2978 (__mmask8) -1, __R);
2979}
2980
2981extern __inline __m512d
2982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2983_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2984 __m512d __B, const int __R)
2985{
2986 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2987 (__v8df) __B,
2988 (__v8df) __W,
2989 (__mmask8) __U, __R);
2990}
2991
2992extern __inline __m512d
2993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2994_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2995 const int __R)
2996{
2997 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2998 (__v8df) __B,
2999 (__v8df)
3000 _mm512_setzero_pd (),
3001 (__mmask8) __U, __R);
3002}
3003
3004extern __inline __m512
3005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3007{
3008 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3009 (__v16sf) __B,
3010 (__v16sf)
0b192937 3011 _mm512_undefined_ps (),
756c5857
AI
3012 (__mmask16) -1, __R);
3013}
3014
3015extern __inline __m512
3016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3017_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3018 __m512 __B, const int __R)
3019{
3020 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3021 (__v16sf) __B,
3022 (__v16sf) __W,
3023 (__mmask16) __U, __R);
3024}
3025
3026extern __inline __m512
3027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3028_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3029 const int __R)
3030{
3031 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3032 (__v16sf) __B,
3033 (__v16sf)
3034 _mm512_setzero_ps (),
3035 (__mmask16) __U, __R);
3036}
3037
075691af
AI
3038extern __inline __m128d
3039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3040_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3041{
3042 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
3043 (__v2df) __B,
3044 __R);
3045}
3046
3047extern __inline __m128
3048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3049_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3050{
3051 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
3052 (__v4sf) __B,
3053 __R);
3054}
756c5857
AI
3055#else
3056#define _mm512_scalef_round_pd(A, B, C) \
0b192937 3057 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
3058
3059#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3060 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3061
3062#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3063 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3064
3065#define _mm512_scalef_round_ps(A, B, C) \
0b192937 3066 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
3067
3068#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3069 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3070
3071#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3072 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
3073
3074#define _mm_scalef_round_sd(A, B, C) \
3075 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
3076
3077#define _mm_scalef_round_ss(A, B, C) \
3078 (__m128)__builtin_ia32_scalefss_round(A, B, C)
756c5857
AI
3079#endif
3080
3081#ifdef __OPTIMIZE__
3082extern __inline __m512d
3083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3084_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3085{
3086 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3087 (__v8df) __B,
3088 (__v8df) __C,
3089 (__mmask8) -1, __R);
3090}
3091
3092extern __inline __m512d
3093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3094_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3095 __m512d __C, const int __R)
3096{
3097 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3098 (__v8df) __B,
3099 (__v8df) __C,
3100 (__mmask8) __U, __R);
3101}
3102
3103extern __inline __m512d
3104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3105_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3106 __mmask8 __U, const int __R)
3107{
3108 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3109 (__v8df) __B,
3110 (__v8df) __C,
3111 (__mmask8) __U, __R);
3112}
3113
3114extern __inline __m512d
3115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3116_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3117 __m512d __C, const int __R)
3118{
3119 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3120 (__v8df) __B,
3121 (__v8df) __C,
3122 (__mmask8) __U, __R);
3123}
3124
3125extern __inline __m512
3126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3127_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3128{
3129 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3130 (__v16sf) __B,
3131 (__v16sf) __C,
3132 (__mmask16) -1, __R);
3133}
3134
3135extern __inline __m512
3136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3137_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3138 __m512 __C, const int __R)
3139{
3140 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3141 (__v16sf) __B,
3142 (__v16sf) __C,
3143 (__mmask16) __U, __R);
3144}
3145
3146extern __inline __m512
3147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3149 __mmask16 __U, const int __R)
3150{
3151 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3152 (__v16sf) __B,
3153 (__v16sf) __C,
3154 (__mmask16) __U, __R);
3155}
3156
3157extern __inline __m512
3158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3159_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3160 __m512 __C, const int __R)
3161{
3162 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3163 (__v16sf) __B,
3164 (__v16sf) __C,
3165 (__mmask16) __U, __R);
3166}
3167
3168extern __inline __m512d
3169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3170_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3171{
3172 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3173 (__v8df) __B,
3174 -(__v8df) __C,
3175 (__mmask8) -1, __R);
3176}
3177
3178extern __inline __m512d
3179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3180_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3181 __m512d __C, const int __R)
3182{
3183 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3184 (__v8df) __B,
3185 -(__v8df) __C,
3186 (__mmask8) __U, __R);
3187}
3188
3189extern __inline __m512d
3190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3191_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3192 __mmask8 __U, const int __R)
3193{
3194 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3195 (__v8df) __B,
3196 (__v8df) __C,
3197 (__mmask8) __U, __R);
3198}
3199
3200extern __inline __m512d
3201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3202_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3203 __m512d __C, const int __R)
3204{
3205 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3206 (__v8df) __B,
3207 -(__v8df) __C,
3208 (__mmask8) __U, __R);
3209}
3210
3211extern __inline __m512
3212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3213_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3214{
3215 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3216 (__v16sf) __B,
3217 -(__v16sf) __C,
3218 (__mmask16) -1, __R);
3219}
3220
3221extern __inline __m512
3222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3224 __m512 __C, const int __R)
3225{
3226 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3227 (__v16sf) __B,
3228 -(__v16sf) __C,
3229 (__mmask16) __U, __R);
3230}
3231
3232extern __inline __m512
3233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3234_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3235 __mmask16 __U, const int __R)
3236{
3237 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3238 (__v16sf) __B,
3239 (__v16sf) __C,
3240 (__mmask16) __U, __R);
3241}
3242
3243extern __inline __m512
3244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3246 __m512 __C, const int __R)
3247{
3248 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3249 (__v16sf) __B,
3250 -(__v16sf) __C,
3251 (__mmask16) __U, __R);
3252}
3253
3254extern __inline __m512d
3255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3256_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3257{
3258 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3259 (__v8df) __B,
3260 (__v8df) __C,
3261 (__mmask8) -1, __R);
3262}
3263
3264extern __inline __m512d
3265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3266_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3267 __m512d __C, const int __R)
3268{
3269 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3270 (__v8df) __B,
3271 (__v8df) __C,
3272 (__mmask8) __U, __R);
3273}
3274
3275extern __inline __m512d
3276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3277_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3278 __mmask8 __U, const int __R)
3279{
3280 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3281 (__v8df) __B,
3282 (__v8df) __C,
3283 (__mmask8) __U, __R);
3284}
3285
3286extern __inline __m512d
3287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3288_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3289 __m512d __C, const int __R)
3290{
3291 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3292 (__v8df) __B,
3293 (__v8df) __C,
3294 (__mmask8) __U, __R);
3295}
3296
3297extern __inline __m512
3298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3299_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3300{
3301 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3302 (__v16sf) __B,
3303 (__v16sf) __C,
3304 (__mmask16) -1, __R);
3305}
3306
3307extern __inline __m512
3308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3309_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3310 __m512 __C, const int __R)
3311{
3312 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3313 (__v16sf) __B,
3314 (__v16sf) __C,
3315 (__mmask16) __U, __R);
3316}
3317
3318extern __inline __m512
3319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3320_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3321 __mmask16 __U, const int __R)
3322{
3323 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3324 (__v16sf) __B,
3325 (__v16sf) __C,
3326 (__mmask16) __U, __R);
3327}
3328
3329extern __inline __m512
3330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3332 __m512 __C, const int __R)
3333{
3334 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3335 (__v16sf) __B,
3336 (__v16sf) __C,
3337 (__mmask16) __U, __R);
3338}
3339
3340extern __inline __m512d
3341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3343{
3344 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3345 (__v8df) __B,
3346 -(__v8df) __C,
3347 (__mmask8) -1, __R);
3348}
3349
3350extern __inline __m512d
3351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3353 __m512d __C, const int __R)
3354{
3355 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3356 (__v8df) __B,
3357 -(__v8df) __C,
3358 (__mmask8) __U, __R);
3359}
3360
3361extern __inline __m512d
3362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3363_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3364 __mmask8 __U, const int __R)
3365{
3366 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3367 (__v8df) __B,
3368 (__v8df) __C,
3369 (__mmask8) __U, __R);
3370}
3371
3372extern __inline __m512d
3373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3374_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3375 __m512d __C, const int __R)
3376{
3377 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3378 (__v8df) __B,
3379 -(__v8df) __C,
3380 (__mmask8) __U, __R);
3381}
3382
3383extern __inline __m512
3384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3385_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3386{
3387 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3388 (__v16sf) __B,
3389 -(__v16sf) __C,
3390 (__mmask16) -1, __R);
3391}
3392
3393extern __inline __m512
3394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3395_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3396 __m512 __C, const int __R)
3397{
3398 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3399 (__v16sf) __B,
3400 -(__v16sf) __C,
3401 (__mmask16) __U, __R);
3402}
3403
3404extern __inline __m512
3405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3407 __mmask16 __U, const int __R)
3408{
3409 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3410 (__v16sf) __B,
3411 (__v16sf) __C,
3412 (__mmask16) __U, __R);
3413}
3414
3415extern __inline __m512
3416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3418 __m512 __C, const int __R)
3419{
3420 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3421 (__v16sf) __B,
3422 -(__v16sf) __C,
3423 (__mmask16) __U, __R);
3424}
3425
3426extern __inline __m512d
3427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3429{
3430 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3431 (__v8df) __B,
3432 (__v8df) __C,
3433 (__mmask8) -1, __R);
3434}
3435
3436extern __inline __m512d
3437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3439 __m512d __C, const int __R)
3440{
3441 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3442 (__v8df) __B,
3443 (__v8df) __C,
3444 (__mmask8) __U, __R);
3445}
3446
3447extern __inline __m512d
3448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3449_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3450 __mmask8 __U, const int __R)
3451{
3452 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3453 (__v8df) __B,
3454 (__v8df) __C,
3455 (__mmask8) __U, __R);
3456}
3457
3458extern __inline __m512d
3459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3460_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3461 __m512d __C, const int __R)
3462{
3463 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3464 (__v8df) __B,
3465 (__v8df) __C,
3466 (__mmask8) __U, __R);
3467}
3468
3469extern __inline __m512
3470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3471_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3472{
3473 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3474 (__v16sf) __B,
3475 (__v16sf) __C,
3476 (__mmask16) -1, __R);
3477}
3478
3479extern __inline __m512
3480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3481_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3482 __m512 __C, const int __R)
3483{
3484 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3485 (__v16sf) __B,
3486 (__v16sf) __C,
3487 (__mmask16) __U, __R);
3488}
3489
3490extern __inline __m512
3491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3493 __mmask16 __U, const int __R)
3494{
3495 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3496 (__v16sf) __B,
3497 (__v16sf) __C,
3498 (__mmask16) __U, __R);
3499}
3500
3501extern __inline __m512
3502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3503_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3504 __m512 __C, const int __R)
3505{
3506 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3507 (__v16sf) __B,
3508 (__v16sf) __C,
3509 (__mmask16) __U, __R);
3510}
3511
3512extern __inline __m512d
3513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3514_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3515{
3516 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3517 (__v8df) __B,
3518 -(__v8df) __C,
3519 (__mmask8) -1, __R);
3520}
3521
3522extern __inline __m512d
3523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3524_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3525 __m512d __C, const int __R)
3526{
3527 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3528 (__v8df) __B,
3529 (__v8df) __C,
3530 (__mmask8) __U, __R);
3531}
3532
3533extern __inline __m512d
3534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3536 __mmask8 __U, const int __R)
3537{
3538 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3539 (__v8df) __B,
3540 (__v8df) __C,
3541 (__mmask8) __U, __R);
3542}
3543
3544extern __inline __m512d
3545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3546_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3547 __m512d __C, const int __R)
3548{
3549 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3550 (__v8df) __B,
3551 -(__v8df) __C,
3552 (__mmask8) __U, __R);
3553}
3554
3555extern __inline __m512
3556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3557_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3558{
3559 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3560 (__v16sf) __B,
3561 -(__v16sf) __C,
3562 (__mmask16) -1, __R);
3563}
3564
3565extern __inline __m512
3566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3568 __m512 __C, const int __R)
3569{
3570 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3571 (__v16sf) __B,
3572 (__v16sf) __C,
3573 (__mmask16) __U, __R);
3574}
3575
3576extern __inline __m512
3577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3579 __mmask16 __U, const int __R)
3580{
3581 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3582 (__v16sf) __B,
3583 (__v16sf) __C,
3584 (__mmask16) __U, __R);
3585}
3586
3587extern __inline __m512
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3590 __m512 __C, const int __R)
3591{
3592 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3593 (__v16sf) __B,
3594 -(__v16sf) __C,
3595 (__mmask16) __U, __R);
3596}
3597#else
3598#define _mm512_fmadd_round_pd(A, B, C, R) \
3599 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3600
3601#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3602 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3603
3604#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3605 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3606
3607#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3608 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3609
3610#define _mm512_fmadd_round_ps(A, B, C, R) \
3611 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3612
3613#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3614 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3615
3616#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3617 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3618
3619#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3620 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3621
3622#define _mm512_fmsub_round_pd(A, B, C, R) \
3623 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3624
3625#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3626 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3627
3628#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3629 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3630
3631#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3632 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3633
3634#define _mm512_fmsub_round_ps(A, B, C, R) \
3635 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3636
3637#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3638 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3639
3640#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3641 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3642
3643#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3644 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3645
3646#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3647 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3648
3649#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3650 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3651
3652#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3653 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3654
3655#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3656 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3657
3658#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3659 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3660
3661#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3662 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3663
3664#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3665 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3666
3667#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3668 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3669
3670#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3671 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3672
3673#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3674 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3675
3676#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3677 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3678
3679#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3680 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3681
3682#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3683 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3684
3685#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3686 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3687
3688#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3689 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3690
3691#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3692 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3693
3694#define _mm512_fnmadd_round_pd(A, B, C, R) \
3695 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3696
3697#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3698 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3699
3700#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3701 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3702
3703#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3704 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3705
3706#define _mm512_fnmadd_round_ps(A, B, C, R) \
3707 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3708
3709#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3710 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3711
3712#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3713 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3714
3715#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3716 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3717
3718#define _mm512_fnmsub_round_pd(A, B, C, R) \
3719 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3720
3721#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3722 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3723
3724#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3725 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3726
3727#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3728 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3729
3730#define _mm512_fnmsub_round_ps(A, B, C, R) \
3731 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3732
3733#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3734 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3735
3736#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3737 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3738
3739#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3740 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3741#endif
3742
3743extern __inline __m512i
3744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3745_mm512_abs_epi64 (__m512i __A)
3746{
3747 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3748 (__v8di)
4271e5cb 3749 _mm512_undefined_epi32 (),
756c5857
AI
3750 (__mmask8) -1);
3751}
3752
3753extern __inline __m512i
3754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3755_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3756{
3757 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3758 (__v8di) __W,
3759 (__mmask8) __U);
3760}
3761
3762extern __inline __m512i
3763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3765{
3766 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3767 (__v8di)
3768 _mm512_setzero_si512 (),
3769 (__mmask8) __U);
3770}
3771
3772extern __inline __m512i
3773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3774_mm512_abs_epi32 (__m512i __A)
3775{
3776 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3777 (__v16si)
4271e5cb 3778 _mm512_undefined_epi32 (),
756c5857
AI
3779 (__mmask16) -1);
3780}
3781
3782extern __inline __m512i
3783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3784_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3785{
3786 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3787 (__v16si) __W,
3788 (__mmask16) __U);
3789}
3790
3791extern __inline __m512i
3792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3794{
3795 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3796 (__v16si)
3797 _mm512_setzero_si512 (),
3798 (__mmask16) __U);
3799}
3800
3801extern __inline __m512
3802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3803_mm512_broadcastss_ps (__m128 __A)
3804{
0b192937
UD
3805 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3806 (__v16sf)
3807 _mm512_undefined_ps (),
756c5857
AI
3808 (__mmask16) -1);
3809}
3810
3811extern __inline __m512
3812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3813_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3814{
3815 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3816 (__v16sf) __O, __M);
3817}
3818
3819extern __inline __m512
3820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3821_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3822{
3823 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3824 (__v16sf)
3825 _mm512_setzero_ps (),
3826 __M);
3827}
3828
3829extern __inline __m512d
3830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3831_mm512_broadcastsd_pd (__m128d __A)
3832{
0b192937
UD
3833 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3834 (__v8df)
3835 _mm512_undefined_pd (),
756c5857
AI
3836 (__mmask8) -1);
3837}
3838
3839extern __inline __m512d
3840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3842{
3843 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3844 (__v8df) __O, __M);
3845}
3846
3847extern __inline __m512d
3848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3849_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3850{
3851 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3852 (__v8df)
3853 _mm512_setzero_pd (),
3854 __M);
3855}
3856
3857extern __inline __m512i
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm512_broadcastd_epi32 (__m128i __A)
3860{
0b192937
UD
3861 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3862 (__v16si)
4271e5cb 3863 _mm512_undefined_epi32 (),
756c5857
AI
3864 (__mmask16) -1);
3865}
3866
3867extern __inline __m512i
3868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3869_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3870{
3871 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3872 (__v16si) __O, __M);
3873}
3874
3875extern __inline __m512i
3876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3877_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3878{
3879 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3880 (__v16si)
3881 _mm512_setzero_si512 (),
3882 __M);
3883}
3884
3885extern __inline __m512i
3886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3887_mm512_set1_epi32 (int __A)
3888{
0b192937
UD
3889 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3890 (__v16si)
4271e5cb 3891 _mm512_undefined_epi32 (),
756c5857
AI
3892 (__mmask16)(-1));
3893}
3894
3895extern __inline __m512i
3896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3897_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3898{
3899 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3900 __M);
3901}
3902
3903extern __inline __m512i
3904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3905_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3906{
3907 return (__m512i)
3908 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3909 (__v16si) _mm512_setzero_si512 (),
3910 __M);
3911}
3912
3913extern __inline __m512i
3914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3915_mm512_broadcastq_epi64 (__m128i __A)
3916{
0b192937
UD
3917 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3918 (__v8di)
4271e5cb 3919 _mm512_undefined_epi32 (),
756c5857
AI
3920 (__mmask8) -1);
3921}
3922
3923extern __inline __m512i
3924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3926{
3927 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3928 (__v8di) __O, __M);
3929}
3930
3931extern __inline __m512i
3932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3933_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3934{
3935 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3936 (__v8di)
3937 _mm512_setzero_si512 (),
3938 __M);
3939}
3940
3941extern __inline __m512i
3942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943_mm512_set1_epi64 (long long __A)
3944{
0b192937
UD
3945 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3946 (__v8di)
4271e5cb 3947 _mm512_undefined_epi32 (),
756c5857 3948 (__mmask8)(-1));
756c5857
AI
3949}
3950
3951extern __inline __m512i
3952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3953_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3954{
756c5857
AI
3955 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3956 __M);
756c5857
AI
3957}
3958
3959extern __inline __m512i
3960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3961_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3962{
756c5857
AI
3963 return (__m512i)
3964 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3965 (__v8di) _mm512_setzero_si512 (),
3966 __M);
756c5857
AI
3967}
3968
3969extern __inline __m512
3970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3971_mm512_broadcast_f32x4 (__m128 __A)
3972{
0b192937
UD
3973 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3974 (__v16sf)
3975 _mm512_undefined_ps (),
756c5857
AI
3976 (__mmask16) -1);
3977}
3978
3979extern __inline __m512
3980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3981_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3982{
3983 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3984 (__v16sf) __O,
3985 __M);
3986}
3987
3988extern __inline __m512
3989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3991{
3992 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3993 (__v16sf)
3994 _mm512_setzero_ps (),
3995 __M);
3996}
3997
3998extern __inline __m512i
3999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4000_mm512_broadcast_i32x4 (__m128i __A)
4001{
756c5857 4002 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
0b192937 4003 (__v16si)
4271e5cb 4004 _mm512_undefined_epi32 (),
756c5857
AI
4005 (__mmask16) -1);
4006}
4007
4008extern __inline __m512i
4009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4010_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4011{
4012 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4013 (__v16si) __O,
4014 __M);
4015}
4016
4017extern __inline __m512i
4018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4019_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4020{
4021 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4022 (__v16si)
4023 _mm512_setzero_si512 (),
4024 __M);
4025}
4026
4027extern __inline __m512d
4028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4029_mm512_broadcast_f64x4 (__m256d __A)
4030{
756c5857 4031 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
0b192937
UD
4032 (__v8df)
4033 _mm512_undefined_pd (),
756c5857
AI
4034 (__mmask8) -1);
4035}
4036
4037extern __inline __m512d
4038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4039_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4040{
4041 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4042 (__v8df) __O,
4043 __M);
4044}
4045
4046extern __inline __m512d
4047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4048_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4049{
4050 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4051 (__v8df)
4052 _mm512_setzero_pd (),
4053 __M);
4054}
4055
4056extern __inline __m512i
4057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058_mm512_broadcast_i64x4 (__m256i __A)
4059{
756c5857 4060 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
0b192937 4061 (__v8di)
4271e5cb 4062 _mm512_undefined_epi32 (),
756c5857
AI
4063 (__mmask8) -1);
4064}
4065
4066extern __inline __m512i
4067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4068_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4069{
4070 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4071 (__v8di) __O,
4072 __M);
4073}
4074
4075extern __inline __m512i
4076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4077_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4078{
4079 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4080 (__v8di)
4081 _mm512_setzero_si512 (),
4082 __M);
4083}
4084
4085typedef enum
4086{
4087 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4088 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4089 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4090 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4091 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4092 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4093 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4094 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4095 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4096 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4097 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4098 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4099 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4100 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4101 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4102 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4103 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4104 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4105 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4106 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4107 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4108 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4109 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4110 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4111 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4112 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4113 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4114 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4115 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4116 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4117 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4118 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4119 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4120 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4121 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4122 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4123 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4124 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4125 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4126 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4127 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4128 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4129 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4130 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4131 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4132 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4133 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4134 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4135 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4136 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4137 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4138 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4139 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4140 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4141 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4142 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4143 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4144 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4145 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4146 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4147 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4148 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4149 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4150 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4151 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4152 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4153 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4154 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4155 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4156 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4157 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4158 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4159 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4160 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4161 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4162 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4163 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4164 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4165 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4166 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4167 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4168 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4169 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4170 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4171 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4172 _MM_PERM_DDDD = 0xFF
4173} _MM_PERM_ENUM;
4174
4175#ifdef __OPTIMIZE__
4176extern __inline __m512i
4177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4178_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4179{
4180 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4181 __mask,
4182 (__v16si)
4271e5cb 4183 _mm512_undefined_epi32 (),
756c5857
AI
4184 (__mmask16) -1);
4185}
4186
4187extern __inline __m512i
4188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4189_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4190 _MM_PERM_ENUM __mask)
4191{
4192 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4193 __mask,
4194 (__v16si) __W,
4195 (__mmask16) __U);
4196}
4197
4198extern __inline __m512i
4199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4200_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4201{
4202 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4203 __mask,
4204 (__v16si)
4205 _mm512_setzero_si512 (),
4206 (__mmask16) __U);
4207}
4208
4209extern __inline __m512i
4210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4211_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4212{
4213 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4214 (__v8di) __B, __imm,
4215 (__v8di)
4271e5cb 4216 _mm512_undefined_epi32 (),
756c5857
AI
4217 (__mmask8) -1);
4218}
4219
4220extern __inline __m512i
4221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4222_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4223 __m512i __B, const int __imm)
4224{
4225 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4226 (__v8di) __B, __imm,
4227 (__v8di) __W,
4228 (__mmask8) __U);
4229}
4230
4231extern __inline __m512i
4232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4233_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4234 const int __imm)
4235{
4236 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4237 (__v8di) __B, __imm,
4238 (__v8di)
4239 _mm512_setzero_si512 (),
4240 (__mmask8) __U);
4241}
4242
4243extern __inline __m512i
4244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4245_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4246{
4247 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4248 (__v16si) __B,
4249 __imm,
4250 (__v16si)
4271e5cb 4251 _mm512_undefined_epi32 (),
756c5857
AI
4252 (__mmask16) -1);
4253}
4254
4255extern __inline __m512i
4256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4257_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4258 __m512i __B, const int __imm)
4259{
4260 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4261 (__v16si) __B,
4262 __imm,
4263 (__v16si) __W,
4264 (__mmask16) __U);
4265}
4266
4267extern __inline __m512i
4268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4270 const int __imm)
4271{
4272 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4273 (__v16si) __B,
4274 __imm,
4275 (__v16si)
4276 _mm512_setzero_si512 (),
4277 (__mmask16) __U);
4278}
4279
4280extern __inline __m512d
4281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4282_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4283{
4284 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4285 (__v8df) __B, __imm,
4286 (__v8df)
0b192937 4287 _mm512_undefined_pd (),
756c5857
AI
4288 (__mmask8) -1);
4289}
4290
4291extern __inline __m512d
4292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4293_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4294 __m512d __B, const int __imm)
4295{
4296 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4297 (__v8df) __B, __imm,
4298 (__v8df) __W,
4299 (__mmask8) __U);
4300}
4301
4302extern __inline __m512d
4303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4305 const int __imm)
4306{
4307 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4308 (__v8df) __B, __imm,
4309 (__v8df)
4310 _mm512_setzero_pd (),
4311 (__mmask8) __U);
4312}
4313
4314extern __inline __m512
4315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4317{
4318 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4319 (__v16sf) __B, __imm,
4320 (__v16sf)
0b192937 4321 _mm512_undefined_ps (),
756c5857
AI
4322 (__mmask16) -1);
4323}
4324
4325extern __inline __m512
4326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4327_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4328 __m512 __B, const int __imm)
4329{
4330 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4331 (__v16sf) __B, __imm,
4332 (__v16sf) __W,
4333 (__mmask16) __U);
4334}
4335
4336extern __inline __m512
4337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4339 const int __imm)
4340{
4341 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4342 (__v16sf) __B, __imm,
4343 (__v16sf)
4344 _mm512_setzero_ps (),
4345 (__mmask16) __U);
4346}
4347
4348#else
4349#define _mm512_shuffle_epi32(X, C) \
4350 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 4351 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4352 (__mmask16)-1))
4353
4354#define _mm512_mask_shuffle_epi32(W, U, X, C) \
4355 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4356 (__v16si)(__m512i)(W),\
4357 (__mmask16)(U)))
4358
4359#define _mm512_maskz_shuffle_epi32(U, X, C) \
4360 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4361 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4362 (__mmask16)(U)))
4363
4364#define _mm512_shuffle_i64x2(X, Y, C) \
4365 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4366 (__v8di)(__m512i)(Y), (int)(C),\
4271e5cb 4367 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4368 (__mmask8)-1))
4369
4370#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4371 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4372 (__v8di)(__m512i)(Y), (int)(C),\
4373 (__v8di)(__m512i)(W),\
4374 (__mmask8)(U)))
4375
4376#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4377 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4378 (__v8di)(__m512i)(Y), (int)(C),\
4379 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4380 (__mmask8)(U)))
4381
4382#define _mm512_shuffle_i32x4(X, Y, C) \
4383 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4384 (__v16si)(__m512i)(Y), (int)(C),\
4271e5cb 4385 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4386 (__mmask16)-1))
4387
4388#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4389 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4390 (__v16si)(__m512i)(Y), (int)(C),\
4391 (__v16si)(__m512i)(W),\
4392 (__mmask16)(U)))
4393
4394#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4395 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4396 (__v16si)(__m512i)(Y), (int)(C),\
4397 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4398 (__mmask16)(U)))
4399
4400#define _mm512_shuffle_f64x2(X, Y, C) \
4401 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4402 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 4403 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
4404 (__mmask8)-1))
4405
4406#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4407 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4408 (__v8df)(__m512d)(Y), (int)(C),\
4409 (__v8df)(__m512d)(W),\
4410 (__mmask8)(U)))
4411
4412#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4413 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4414 (__v8df)(__m512d)(Y), (int)(C),\
4415 (__v8df)(__m512d)_mm512_setzero_pd(),\
4416 (__mmask8)(U)))
4417
4418#define _mm512_shuffle_f32x4(X, Y, C) \
4419 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4420 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 4421 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
4422 (__mmask16)-1))
4423
4424#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4425 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4426 (__v16sf)(__m512)(Y), (int)(C),\
4427 (__v16sf)(__m512)(W),\
4428 (__mmask16)(U)))
4429
4430#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4431 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4432 (__v16sf)(__m512)(Y), (int)(C),\
4433 (__v16sf)(__m512)_mm512_setzero_ps(),\
4434 (__mmask16)(U)))
4435#endif
4436
4437extern __inline __m512i
4438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4440{
4441 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4442 (__v16si) __B,
4443 (__v16si)
4271e5cb 4444 _mm512_undefined_epi32 (),
756c5857
AI
4445 (__mmask16) -1);
4446}
4447
4448extern __inline __m512i
4449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4450_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4451{
4452 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4453 (__v16si) __B,
4454 (__v16si) __W,
4455 (__mmask16) __U);
4456}
4457
4458extern __inline __m512i
4459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4461{
4462 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4463 (__v16si) __B,
4464 (__v16si)
4465 _mm512_setzero_si512 (),
4466 (__mmask16) __U);
4467}
4468
4469extern __inline __m512i
4470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4472{
4473 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4474 (__v16si) __B,
4475 (__v16si)
4271e5cb 4476 _mm512_undefined_epi32 (),
756c5857
AI
4477 (__mmask16) -1);
4478}
4479
4480extern __inline __m512i
4481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4482_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4483{
4484 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4485 (__v16si) __B,
4486 (__v16si) __W,
4487 (__mmask16) __U);
4488}
4489
4490extern __inline __m512i
4491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4493{
4494 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4495 (__v16si) __B,
4496 (__v16si)
4497 _mm512_setzero_si512 (),
4498 (__mmask16) __U);
4499}
4500
4501extern __inline __m512i
4502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4504{
4505 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4506 (__v8di) __B,
4507 (__v8di)
4271e5cb 4508 _mm512_undefined_epi32 (),
756c5857
AI
4509 (__mmask8) -1);
4510}
4511
4512extern __inline __m512i
4513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4515{
4516 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4517 (__v8di) __B,
4518 (__v8di) __W,
4519 (__mmask8) __U);
4520}
4521
4522extern __inline __m512i
4523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4525{
4526 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4527 (__v8di) __B,
4528 (__v8di)
4529 _mm512_setzero_si512 (),
4530 (__mmask8) __U);
4531}
4532
4533extern __inline __m512i
4534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4535_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4536{
4537 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4538 (__v8di) __B,
4539 (__v8di)
4271e5cb 4540 _mm512_undefined_epi32 (),
756c5857
AI
4541 (__mmask8) -1);
4542}
4543
4544extern __inline __m512i
4545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4546_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4547{
4548 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4549 (__v8di) __B,
4550 (__v8di) __W,
4551 (__mmask8) __U);
4552}
4553
4554extern __inline __m512i
4555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4556_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4557{
4558 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4559 (__v8di) __B,
4560 (__v8di)
4561 _mm512_setzero_si512 (),
4562 (__mmask8) __U);
4563}
4564
4565#ifdef __OPTIMIZE__
4566extern __inline __m256i
4567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4568_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4569{
4570 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4571 (__v8si)
0b192937 4572 _mm256_undefined_si256 (),
756c5857
AI
4573 (__mmask8) -1, __R);
4574}
4575
4576extern __inline __m256i
4577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4578_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4579 const int __R)
4580{
4581 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4582 (__v8si) __W,
4583 (__mmask8) __U, __R);
4584}
4585
4586extern __inline __m256i
4587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4588_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4589{
4590 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4591 (__v8si)
4592 _mm256_setzero_si256 (),
4593 (__mmask8) __U, __R);
4594}
4595
4596extern __inline __m256i
4597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4599{
4600 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4601 (__v8si)
0b192937 4602 _mm256_undefined_si256 (),
756c5857
AI
4603 (__mmask8) -1, __R);
4604}
4605
4606extern __inline __m256i
4607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4608_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4609 const int __R)
4610{
4611 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4612 (__v8si) __W,
4613 (__mmask8) __U, __R);
4614}
4615
4616extern __inline __m256i
4617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4618_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4619{
4620 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4621 (__v8si)
4622 _mm256_setzero_si256 (),
4623 (__mmask8) __U, __R);
4624}
4625#else
4626#define _mm512_cvtt_roundpd_epi32(A, B) \
0b192937 4627 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4628
4629#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4630 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4631
4632#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4633 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4634
4635#define _mm512_cvtt_roundpd_epu32(A, B) \
0b192937 4636 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4637
4638#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4639 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4640
4641#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4642 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4643#endif
4644
4645#ifdef __OPTIMIZE__
4646extern __inline __m256i
4647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4648_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4649{
4650 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4651 (__v8si)
0b192937 4652 _mm256_undefined_si256 (),
756c5857
AI
4653 (__mmask8) -1, __R);
4654}
4655
4656extern __inline __m256i
4657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4658_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4659 const int __R)
4660{
4661 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4662 (__v8si) __W,
4663 (__mmask8) __U, __R);
4664}
4665
4666extern __inline __m256i
4667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4668_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4669{
4670 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4671 (__v8si)
4672 _mm256_setzero_si256 (),
4673 (__mmask8) __U, __R);
4674}
4675
4676extern __inline __m256i
4677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4678_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4679{
4680 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4681 (__v8si)
0b192937 4682 _mm256_undefined_si256 (),
756c5857
AI
4683 (__mmask8) -1, __R);
4684}
4685
4686extern __inline __m256i
4687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4688_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4689 const int __R)
4690{
4691 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4692 (__v8si) __W,
4693 (__mmask8) __U, __R);
4694}
4695
4696extern __inline __m256i
4697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4699{
4700 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4701 (__v8si)
4702 _mm256_setzero_si256 (),
4703 (__mmask8) __U, __R);
4704}
4705#else
4706#define _mm512_cvt_roundpd_epi32(A, B) \
0b192937 4707 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4708
4709#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4710 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4711
4712#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4713 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4714
4715#define _mm512_cvt_roundpd_epu32(A, B) \
0b192937 4716 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4717
4718#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4719 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4720
4721#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4722 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4723#endif
4724
4725#ifdef __OPTIMIZE__
4726extern __inline __m512i
4727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4728_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4729{
4730 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4731 (__v16si)
4271e5cb 4732 _mm512_undefined_epi32 (),
756c5857
AI
4733 (__mmask16) -1, __R);
4734}
4735
4736extern __inline __m512i
4737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4738_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4739 const int __R)
4740{
4741 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4742 (__v16si) __W,
4743 (__mmask16) __U, __R);
4744}
4745
4746extern __inline __m512i
4747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4748_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4749{
4750 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4751 (__v16si)
4752 _mm512_setzero_si512 (),
4753 (__mmask16) __U, __R);
4754}
4755
4756extern __inline __m512i
4757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4758_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4759{
4760 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4761 (__v16si)
4271e5cb 4762 _mm512_undefined_epi32 (),
756c5857
AI
4763 (__mmask16) -1, __R);
4764}
4765
4766extern __inline __m512i
4767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4768_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4769 const int __R)
4770{
4771 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4772 (__v16si) __W,
4773 (__mmask16) __U, __R);
4774}
4775
4776extern __inline __m512i
4777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4778_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4779{
4780 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4781 (__v16si)
4782 _mm512_setzero_si512 (),
4783 (__mmask16) __U, __R);
4784}
4785#else
4786#define _mm512_cvtt_roundps_epi32(A, B) \
4271e5cb 4787 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4788
4789#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4790 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4791
4792#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4793 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4794
4795#define _mm512_cvtt_roundps_epu32(A, B) \
4271e5cb 4796 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4797
4798#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4799 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4800
4801#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4802 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4803#endif
4804
4805#ifdef __OPTIMIZE__
4806extern __inline __m512i
4807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4809{
4810 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4811 (__v16si)
4271e5cb 4812 _mm512_undefined_epi32 (),
756c5857
AI
4813 (__mmask16) -1, __R);
4814}
4815
4816extern __inline __m512i
4817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4818_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4819 const int __R)
4820{
4821 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4822 (__v16si) __W,
4823 (__mmask16) __U, __R);
4824}
4825
4826extern __inline __m512i
4827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4828_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4829{
4830 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4831 (__v16si)
4832 _mm512_setzero_si512 (),
4833 (__mmask16) __U, __R);
4834}
4835
4836extern __inline __m512i
4837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4839{
4840 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4841 (__v16si)
4271e5cb 4842 _mm512_undefined_epi32 (),
756c5857
AI
4843 (__mmask16) -1, __R);
4844}
4845
4846extern __inline __m512i
4847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4848_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4849 const int __R)
4850{
4851 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4852 (__v16si) __W,
4853 (__mmask16) __U, __R);
4854}
4855
4856extern __inline __m512i
4857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4858_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4859{
4860 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4861 (__v16si)
4862 _mm512_setzero_si512 (),
4863 (__mmask16) __U, __R);
4864}
4865#else
4866#define _mm512_cvt_roundps_epi32(A, B) \
4271e5cb 4867 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4868
4869#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4870 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4871
4872#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4873 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4874
4875#define _mm512_cvt_roundps_epu32(A, B) \
4271e5cb 4876 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4877
4878#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4879 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4880
4881#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4882 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4883#endif
4884
4885extern __inline __m128d
4886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4887_mm_cvtu32_sd (__m128d __A, unsigned __B)
4888{
4889 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4890}
4891
4892#ifdef __x86_64__
4893#ifdef __OPTIMIZE__
4894extern __inline __m128d
4895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4896_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4897{
4898 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4899}
4900
4901extern __inline __m128d
4902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4904{
4905 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4906}
4907
4908extern __inline __m128d
4909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4910_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4911{
4912 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4913}
4914#else
4915#define _mm_cvt_roundu64_sd(A, B, C) \
4916 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4917
4918#define _mm_cvt_roundi64_sd(A, B, C) \
4919 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4920
4921#define _mm_cvt_roundsi64_sd(A, B, C) \
4922 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4923#endif
4924
4925#endif
4926
4927#ifdef __OPTIMIZE__
4928extern __inline __m128
4929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4931{
4932 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4933}
4934
4935extern __inline __m128
4936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4937_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4938{
4939 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4940}
4941
4942extern __inline __m128
4943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4945{
4946 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4947}
4948#else
4949#define _mm_cvt_roundu32_ss(A, B, C) \
4950 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4951
4952#define _mm_cvt_roundi32_ss(A, B, C) \
4953 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4954
4955#define _mm_cvt_roundsi32_ss(A, B, C) \
4956 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4957#endif
4958
4959#ifdef __x86_64__
4960#ifdef __OPTIMIZE__
4961extern __inline __m128
4962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4963_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4964{
4965 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4966}
4967
4968extern __inline __m128
4969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4970_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4971{
4972 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4973}
4974
4975extern __inline __m128
4976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4978{
4979 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4980}
4981#else
4982#define _mm_cvt_roundu64_ss(A, B, C) \
4983 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4984
4985#define _mm_cvt_roundi64_ss(A, B, C) \
4986 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4987
4988#define _mm_cvt_roundsi64_ss(A, B, C) \
4989 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4990#endif
4991
4992#endif
4993
4994extern __inline __m128i
4995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996_mm512_cvtepi32_epi8 (__m512i __A)
4997{
0b192937
UD
4998 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4999 (__v16qi)
5000 _mm_undefined_si128 (),
756c5857
AI
5001 (__mmask16) -1);
5002}
5003
d256b866
IT
5004extern __inline void
5005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5006_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5007{
5008 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5009}
5010
756c5857
AI
5011extern __inline __m128i
5012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5013_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5014{
5015 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5016 (__v16qi) __O, __M);
5017}
5018
5019extern __inline __m128i
5020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5021_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5022{
5023 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5024 (__v16qi)
5025 _mm_setzero_si128 (),
5026 __M);
5027}
5028
5029extern __inline __m128i
5030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5031_mm512_cvtsepi32_epi8 (__m512i __A)
5032{
0b192937
UD
5033 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5034 (__v16qi)
5035 _mm_undefined_si128 (),
756c5857
AI
5036 (__mmask16) -1);
5037}
5038
d256b866
IT
5039extern __inline void
5040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5041_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5042{
5043 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5044}
5045
756c5857
AI
5046extern __inline __m128i
5047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5048_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5049{
5050 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5051 (__v16qi) __O, __M);
5052}
5053
5054extern __inline __m128i
5055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5057{
5058 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5059 (__v16qi)
5060 _mm_setzero_si128 (),
5061 __M);
5062}
5063
5064extern __inline __m128i
5065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5066_mm512_cvtusepi32_epi8 (__m512i __A)
5067{
0b192937
UD
5068 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5069 (__v16qi)
5070 _mm_undefined_si128 (),
756c5857
AI
5071 (__mmask16) -1);
5072}
5073
d256b866
IT
5074extern __inline void
5075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5076_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5077{
5078 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5079}
5080
756c5857
AI
5081extern __inline __m128i
5082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5084{
5085 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5086 (__v16qi) __O,
5087 __M);
5088}
5089
5090extern __inline __m128i
5091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5093{
5094 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5095 (__v16qi)
5096 _mm_setzero_si128 (),
5097 __M);
5098}
5099
5100extern __inline __m256i
5101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5102_mm512_cvtepi32_epi16 (__m512i __A)
5103{
0b192937
UD
5104 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5105 (__v16hi)
5106 _mm256_undefined_si256 (),
756c5857
AI
5107 (__mmask16) -1);
5108}
5109
d256b866
IT
5110extern __inline void
5111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5112_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5113{
5114 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5115}
5116
756c5857
AI
5117extern __inline __m256i
5118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5119_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5120{
5121 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5122 (__v16hi) __O, __M);
5123}
5124
5125extern __inline __m256i
5126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5127_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5128{
5129 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5130 (__v16hi)
5131 _mm256_setzero_si256 (),
5132 __M);
5133}
5134
5135extern __inline __m256i
5136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5137_mm512_cvtsepi32_epi16 (__m512i __A)
5138{
0b192937
UD
5139 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5140 (__v16hi)
5141 _mm256_undefined_si256 (),
756c5857
AI
5142 (__mmask16) -1);
5143}
5144
d256b866
IT
5145extern __inline void
5146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5147_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5148{
5149 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5150}
5151
756c5857
AI
5152extern __inline __m256i
5153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5154_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5155{
5156 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5157 (__v16hi) __O, __M);
5158}
5159
5160extern __inline __m256i
5161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5162_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5163{
5164 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5165 (__v16hi)
5166 _mm256_setzero_si256 (),
5167 __M);
5168}
5169
5170extern __inline __m256i
5171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5172_mm512_cvtusepi32_epi16 (__m512i __A)
5173{
0b192937
UD
5174 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5175 (__v16hi)
5176 _mm256_undefined_si256 (),
756c5857
AI
5177 (__mmask16) -1);
5178}
5179
d256b866
IT
5180extern __inline void
5181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5182_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5183{
5184 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5185}
5186
756c5857
AI
5187extern __inline __m256i
5188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5189_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5190{
5191 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5192 (__v16hi) __O,
5193 __M);
5194}
5195
5196extern __inline __m256i
5197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5198_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5199{
5200 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5201 (__v16hi)
5202 _mm256_setzero_si256 (),
5203 __M);
5204}
5205
5206extern __inline __m256i
5207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5208_mm512_cvtepi64_epi32 (__m512i __A)
5209{
0b192937
UD
5210 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5211 (__v8si)
5212 _mm256_undefined_si256 (),
756c5857
AI
5213 (__mmask8) -1);
5214}
5215
d256b866
IT
5216extern __inline void
5217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5218_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5219{
5220 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5221}
5222
756c5857
AI
5223extern __inline __m256i
5224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5226{
5227 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5228 (__v8si) __O, __M);
5229}
5230
5231extern __inline __m256i
5232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5233_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5234{
5235 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5236 (__v8si)
5237 _mm256_setzero_si256 (),
5238 __M);
5239}
5240
5241extern __inline __m256i
5242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5243_mm512_cvtsepi64_epi32 (__m512i __A)
5244{
0b192937
UD
5245 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5246 (__v8si)
5247 _mm256_undefined_si256 (),
756c5857
AI
5248 (__mmask8) -1);
5249}
5250
d256b866
IT
5251extern __inline void
5252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5253_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5254{
5255 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5256}
5257
756c5857
AI
5258extern __inline __m256i
5259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5260_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5261{
5262 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5263 (__v8si) __O, __M);
5264}
5265
5266extern __inline __m256i
5267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5268_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5269{
5270 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5271 (__v8si)
5272 _mm256_setzero_si256 (),
5273 __M);
5274}
5275
5276extern __inline __m256i
5277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5278_mm512_cvtusepi64_epi32 (__m512i __A)
5279{
0b192937
UD
5280 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5281 (__v8si)
5282 _mm256_undefined_si256 (),
756c5857
AI
5283 (__mmask8) -1);
5284}
5285
6fb82517 5286extern __inline void
d256b866
IT
5287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5288_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5289{
5290 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5291}
5292
756c5857
AI
5293extern __inline __m256i
5294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5295_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5296{
5297 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5298 (__v8si) __O, __M);
5299}
5300
5301extern __inline __m256i
5302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5303_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5304{
5305 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5306 (__v8si)
5307 _mm256_setzero_si256 (),
5308 __M);
5309}
5310
5311extern __inline __m128i
5312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313_mm512_cvtepi64_epi16 (__m512i __A)
5314{
0b192937
UD
5315 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5316 (__v8hi)
5317 _mm_undefined_si128 (),
756c5857
AI
5318 (__mmask8) -1);
5319}
5320
d256b866
IT
5321extern __inline void
5322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5323_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5324{
5325 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5326}
5327
756c5857
AI
5328extern __inline __m128i
5329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5330_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5331{
5332 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5333 (__v8hi) __O, __M);
5334}
5335
5336extern __inline __m128i
5337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5339{
5340 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5341 (__v8hi)
5342 _mm_setzero_si128 (),
5343 __M);
5344}
5345
5346extern __inline __m128i
5347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348_mm512_cvtsepi64_epi16 (__m512i __A)
5349{
0b192937
UD
5350 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5351 (__v8hi)
5352 _mm_undefined_si128 (),
756c5857
AI
5353 (__mmask8) -1);
5354}
5355
d256b866
IT
5356extern __inline void
5357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5359{
5360 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5361}
5362
756c5857
AI
5363extern __inline __m128i
5364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5365_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5366{
5367 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5368 (__v8hi) __O, __M);
5369}
5370
5371extern __inline __m128i
5372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5373_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5374{
5375 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5376 (__v8hi)
5377 _mm_setzero_si128 (),
5378 __M);
5379}
5380
5381extern __inline __m128i
5382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5383_mm512_cvtusepi64_epi16 (__m512i __A)
5384{
0b192937
UD
5385 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5386 (__v8hi)
5387 _mm_undefined_si128 (),
756c5857
AI
5388 (__mmask8) -1);
5389}
5390
d256b866
IT
5391extern __inline void
5392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5393_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5394{
5395 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5396}
5397
756c5857
AI
5398extern __inline __m128i
5399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5400_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5401{
5402 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5403 (__v8hi) __O, __M);
5404}
5405
5406extern __inline __m128i
5407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5408_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5409{
5410 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5411 (__v8hi)
5412 _mm_setzero_si128 (),
5413 __M);
5414}
5415
5416extern __inline __m128i
5417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5418_mm512_cvtepi64_epi8 (__m512i __A)
5419{
0b192937
UD
5420 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5421 (__v16qi)
5422 _mm_undefined_si128 (),
756c5857
AI
5423 (__mmask8) -1);
5424}
5425
d256b866
IT
5426extern __inline void
5427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5428_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5429{
5430 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5431}
5432
756c5857
AI
5433extern __inline __m128i
5434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5435_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5436{
5437 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5438 (__v16qi) __O, __M);
5439}
5440
5441extern __inline __m128i
5442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5443_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5444{
5445 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5446 (__v16qi)
5447 _mm_setzero_si128 (),
5448 __M);
5449}
5450
5451extern __inline __m128i
5452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5453_mm512_cvtsepi64_epi8 (__m512i __A)
5454{
0b192937
UD
5455 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5456 (__v16qi)
5457 _mm_undefined_si128 (),
756c5857
AI
5458 (__mmask8) -1);
5459}
5460
d256b866
IT
5461extern __inline void
5462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5463_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5464{
5465 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5466}
5467
756c5857
AI
5468extern __inline __m128i
5469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5470_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5471{
5472 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5473 (__v16qi) __O, __M);
5474}
5475
5476extern __inline __m128i
5477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5478_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5479{
5480 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5481 (__v16qi)
5482 _mm_setzero_si128 (),
5483 __M);
5484}
5485
5486extern __inline __m128i
5487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5488_mm512_cvtusepi64_epi8 (__m512i __A)
5489{
0b192937
UD
5490 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5491 (__v16qi)
5492 _mm_undefined_si128 (),
756c5857
AI
5493 (__mmask8) -1);
5494}
5495
d256b866
IT
5496extern __inline void
5497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5498_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5499{
5500 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5501}
5502
756c5857
AI
5503extern __inline __m128i
5504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5505_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5506{
5507 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5508 (__v16qi) __O,
5509 __M);
5510}
5511
5512extern __inline __m128i
5513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5514_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5515{
5516 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5517 (__v16qi)
5518 _mm_setzero_si128 (),
5519 __M);
5520}
5521
5522extern __inline __m512d
5523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5524_mm512_cvtepi32_pd (__m256i __A)
5525{
5526 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5527 (__v8df)
0b192937 5528 _mm512_undefined_pd (),
756c5857
AI
5529 (__mmask8) -1);
5530}
5531
5532extern __inline __m512d
5533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5535{
5536 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5537 (__v8df) __W,
5538 (__mmask8) __U);
5539}
5540
5541extern __inline __m512d
5542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5544{
5545 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5546 (__v8df)
5547 _mm512_setzero_pd (),
5548 (__mmask8) __U);
5549}
5550
5551extern __inline __m512d
5552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5553_mm512_cvtepu32_pd (__m256i __A)
5554{
5555 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5556 (__v8df)
0b192937 5557 _mm512_undefined_pd (),
756c5857
AI
5558 (__mmask8) -1);
5559}
5560
5561extern __inline __m512d
5562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5563_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5564{
5565 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5566 (__v8df) __W,
5567 (__mmask8) __U);
5568}
5569
5570extern __inline __m512d
5571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5572_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5573{
5574 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5575 (__v8df)
5576 _mm512_setzero_pd (),
5577 (__mmask8) __U);
5578}
5579
5580#ifdef __OPTIMIZE__
5581extern __inline __m512
5582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5583_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5584{
5585 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5586 (__v16sf)
0b192937 5587 _mm512_undefined_ps (),
756c5857
AI
5588 (__mmask16) -1, __R);
5589}
5590
5591extern __inline __m512
5592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5593_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5594 const int __R)
5595{
5596 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5597 (__v16sf) __W,
5598 (__mmask16) __U, __R);
5599}
5600
5601extern __inline __m512
5602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5604{
5605 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5606 (__v16sf)
5607 _mm512_setzero_ps (),
5608 (__mmask16) __U, __R);
5609}
5610
5611extern __inline __m512
5612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5613_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5614{
5615 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5616 (__v16sf)
0b192937 5617 _mm512_undefined_ps (),
756c5857
AI
5618 (__mmask16) -1, __R);
5619}
5620
5621extern __inline __m512
5622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5623_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5624 const int __R)
5625{
5626 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5627 (__v16sf) __W,
5628 (__mmask16) __U, __R);
5629}
5630
5631extern __inline __m512
5632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5633_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5634{
5635 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5636 (__v16sf)
5637 _mm512_setzero_ps (),
5638 (__mmask16) __U, __R);
5639}
5640
5641#else
5642#define _mm512_cvt_roundepi32_ps(A, B) \
0b192937 5643 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5644
5645#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5646 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5647
5648#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5649 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5650
5651#define _mm512_cvt_roundepu32_ps(A, B) \
0b192937 5652 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5653
5654#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5655 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5656
5657#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5658 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5659#endif
5660
5661#ifdef __OPTIMIZE__
5662extern __inline __m256d
5663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5664_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5665{
5666 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5667 __imm,
5668 (__v4df)
0b192937 5669 _mm256_undefined_pd (),
756c5857
AI
5670 (__mmask8) -1);
5671}
5672
5673extern __inline __m256d
5674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5675_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5676 const int __imm)
5677{
5678 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5679 __imm,
5680 (__v4df) __W,
5681 (__mmask8) __U);
5682}
5683
5684extern __inline __m256d
5685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5686_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5687{
5688 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5689 __imm,
5690 (__v4df)
5691 _mm256_setzero_pd (),
5692 (__mmask8) __U);
5693}
5694
5695extern __inline __m128
5696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5697_mm512_extractf32x4_ps (__m512 __A, const int __imm)
5698{
5699 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5700 __imm,
5701 (__v4sf)
0b192937 5702 _mm_undefined_ps (),
756c5857
AI
5703 (__mmask8) -1);
5704}
5705
5706extern __inline __m128
5707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5708_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5709 const int __imm)
5710{
5711 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5712 __imm,
5713 (__v4sf) __W,
5714 (__mmask8) __U);
5715}
5716
5717extern __inline __m128
5718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5719_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5720{
5721 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5722 __imm,
5723 (__v4sf)
5724 _mm_setzero_ps (),
5725 (__mmask8) __U);
5726}
5727
5728extern __inline __m256i
5729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5730_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5731{
5732 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5733 __imm,
5734 (__v4di)
0b192937 5735 _mm256_undefined_si256 (),
756c5857
AI
5736 (__mmask8) -1);
5737}
5738
5739extern __inline __m256i
5740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5741_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5742 const int __imm)
5743{
5744 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5745 __imm,
5746 (__v4di) __W,
5747 (__mmask8) __U);
5748}
5749
5750extern __inline __m256i
5751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5752_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5753{
5754 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5755 __imm,
5756 (__v4di)
5757 _mm256_setzero_si256 (),
5758 (__mmask8) __U);
5759}
5760
5761extern __inline __m128i
5762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5764{
5765 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5766 __imm,
5767 (__v4si)
0b192937 5768 _mm_undefined_si128 (),
756c5857
AI
5769 (__mmask8) -1);
5770}
5771
5772extern __inline __m128i
5773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5774_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5775 const int __imm)
5776{
5777 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5778 __imm,
5779 (__v4si) __W,
5780 (__mmask8) __U);
5781}
5782
5783extern __inline __m128i
5784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5785_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5786{
5787 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5788 __imm,
5789 (__v4si)
5790 _mm_setzero_si128 (),
5791 (__mmask8) __U);
5792}
5793#else
5794
5795#define _mm512_extractf64x4_pd(X, C) \
5796 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5797 (int) (C),\
0b192937 5798 (__v4df)(__m256d)_mm256_undefined_pd(),\
756c5857
AI
5799 (__mmask8)-1))
5800
5801#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5802 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5803 (int) (C),\
5804 (__v4df)(__m256d)(W),\
5805 (__mmask8)(U)))
5806
5807#define _mm512_maskz_extractf64x4_pd(U, X, C) \
5808 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5809 (int) (C),\
5810 (__v4df)(__m256d)_mm256_setzero_pd(),\
5811 (__mmask8)(U)))
5812
5813#define _mm512_extractf32x4_ps(X, C) \
5814 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5815 (int) (C),\
0b192937 5816 (__v4sf)(__m128)_mm_undefined_ps(),\
756c5857
AI
5817 (__mmask8)-1))
5818
5819#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5820 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5821 (int) (C),\
5822 (__v4sf)(__m128)(W),\
5823 (__mmask8)(U)))
5824
5825#define _mm512_maskz_extractf32x4_ps(U, X, C) \
5826 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5827 (int) (C),\
5828 (__v4sf)(__m128)_mm_setzero_ps(),\
5829 (__mmask8)(U)))
5830
5831#define _mm512_extracti64x4_epi64(X, C) \
5832 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5833 (int) (C),\
0b192937 5834 (__v4di)(__m256i)_mm256_undefined_si256 (),\
756c5857
AI
5835 (__mmask8)-1))
5836
5837#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5838 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5839 (int) (C),\
5840 (__v4di)(__m256i)(W),\
5841 (__mmask8)(U)))
5842
5843#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5844 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5845 (int) (C),\
5846 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5847 (__mmask8)(U)))
5848
5849#define _mm512_extracti32x4_epi32(X, C) \
5850 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5851 (int) (C),\
0b192937 5852 (__v4si)(__m128i)_mm_undefined_si128 (),\
756c5857
AI
5853 (__mmask8)-1))
5854
5855#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5856 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5857 (int) (C),\
5858 (__v4si)(__m128i)(W),\
5859 (__mmask8)(U)))
5860
5861#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5862 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5863 (int) (C),\
5864 (__v4si)(__m128i)_mm_setzero_si128 (),\
5865 (__mmask8)(U)))
5866#endif
5867
5868#ifdef __OPTIMIZE__
5869extern __inline __m512i
5870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5871_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5872{
5873 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5874 (__v4si) __B,
5875 __imm,
5876 (__v16si) __A, -1);
5877}
5878
5879extern __inline __m512
5880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5881_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5882{
5883 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5884 (__v4sf) __B,
5885 __imm,
5886 (__v16sf) __A, -1);
5887}
5888
5889extern __inline __m512i
5890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5891_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5892{
5893 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5894 (__v4di) __B,
5895 __imm,
5896 (__v8di)
4271e5cb 5897 _mm512_undefined_epi32 (),
756c5857
AI
5898 (__mmask8) -1);
5899}
5900
5901extern __inline __m512i
5902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5903_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5904 __m256i __B, const int __imm)
5905{
5906 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5907 (__v4di) __B,
5908 __imm,
5909 (__v8di) __W,
5910 (__mmask8) __U);
5911}
5912
5913extern __inline __m512i
5914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5915_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5916 const int __imm)
5917{
5918 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5919 (__v4di) __B,
5920 __imm,
5921 (__v8di)
5922 _mm512_setzero_si512 (),
5923 (__mmask8) __U);
5924}
5925
5926extern __inline __m512d
5927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5928_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5929{
5930 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5931 (__v4df) __B,
5932 __imm,
5933 (__v8df)
0b192937 5934 _mm512_undefined_pd (),
756c5857
AI
5935 (__mmask8) -1);
5936}
5937
5938extern __inline __m512d
5939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5941 __m256d __B, const int __imm)
5942{
5943 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5944 (__v4df) __B,
5945 __imm,
5946 (__v8df) __W,
5947 (__mmask8) __U);
5948}
5949
5950extern __inline __m512d
5951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5952_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5953 const int __imm)
5954{
5955 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5956 (__v4df) __B,
5957 __imm,
5958 (__v8df)
5959 _mm512_setzero_pd (),
5960 (__mmask8) __U);
5961}
5962#else
5963#define _mm512_insertf32x4(X, Y, C) \
5964 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5965 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5966
5967#define _mm512_inserti32x4(X, Y, C) \
5968 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5969 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5970
5971#define _mm512_insertf64x4(X, Y, C) \
5972 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5973 (__v4df)(__m256d) (Y), (int) (C), \
0b192937 5974 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
5975 (__mmask8)-1))
5976
5977#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5978 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5979 (__v4df)(__m256d) (Y), (int) (C), \
5980 (__v8df)(__m512d)(W), \
5981 (__mmask8)(U)))
5982
5983#define _mm512_maskz_insertf64x4(U, X, Y, C) \
5984 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5985 (__v4df)(__m256d) (Y), (int) (C), \
5986 (__v8df)(__m512d)_mm512_setzero_pd(), \
5987 (__mmask8)(U)))
5988
5989#define _mm512_inserti64x4(X, Y, C) \
5990 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5991 (__v4di)(__m256i) (Y), (int) (C), \
4271e5cb 5992 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
5993 (__mmask8)-1))
5994
5995#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5996 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5997 (__v4di)(__m256i) (Y), (int) (C),\
5998 (__v8di)(__m512i)(W),\
5999 (__mmask8)(U)))
6000
6001#define _mm512_maskz_inserti64x4(U, X, Y, C) \
6002 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6003 (__v4di)(__m256i) (Y), (int) (C), \
6004 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6005 (__mmask8)(U)))
6006#endif
6007
6008extern __inline __m512d
6009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6010_mm512_loadu_pd (void const *__P)
6011{
c6b0037d 6012 return *(__m512d_u *)__P;
756c5857
AI
6013}
6014
6015extern __inline __m512d
6016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6017_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6018{
fc9cf6da 6019 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6020 (__v8df) __W,
6021 (__mmask8) __U);
6022}
6023
6024extern __inline __m512d
6025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6026_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6027{
fc9cf6da 6028 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6029 (__v8df)
6030 _mm512_setzero_pd (),
6031 (__mmask8) __U);
6032}
6033
6034extern __inline void
6035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6036_mm512_storeu_pd (void *__P, __m512d __A)
6037{
c6b0037d 6038 *(__m512d_u *)__P = __A;
756c5857
AI
6039}
6040
6041extern __inline void
6042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6043_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6044{
fc9cf6da 6045 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
756c5857
AI
6046 (__mmask8) __U);
6047}
6048
6049extern __inline __m512
6050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6051_mm512_loadu_ps (void const *__P)
6052{
c6b0037d 6053 return *(__m512_u *)__P;
756c5857
AI
6054}
6055
6056extern __inline __m512
6057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6058_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6059{
fc9cf6da 6060 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6061 (__v16sf) __W,
6062 (__mmask16) __U);
6063}
6064
6065extern __inline __m512
6066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6068{
fc9cf6da 6069 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6070 (__v16sf)
6071 _mm512_setzero_ps (),
6072 (__mmask16) __U);
6073}
6074
6075extern __inline void
6076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6077_mm512_storeu_ps (void *__P, __m512 __A)
6078{
c6b0037d 6079 *(__m512_u *)__P = __A;
756c5857
AI
6080}
6081
6082extern __inline void
6083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6084_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6085{
fc9cf6da 6086 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
756c5857
AI
6087 (__mmask16) __U);
6088}
6089
6090extern __inline __m512i
6091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6093{
fc9cf6da 6094 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6095 (__v8di) __W,
6096 (__mmask8) __U);
6097}
6098
6099extern __inline __m512i
6100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6101_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6102{
fc9cf6da 6103 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6104 (__v8di)
6105 _mm512_setzero_si512 (),
6106 (__mmask8) __U);
6107}
6108
6109extern __inline void
6110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6111_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6112{
fc9cf6da 6113 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
756c5857
AI
6114 (__mmask8) __U);
6115}
6116
6117extern __inline __m512i
6118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6119_mm512_loadu_si512 (void const *__P)
756c5857 6120{
c6b0037d 6121 return *(__m512i_u *)__P;
756c5857
AI
6122}
6123
6124extern __inline __m512i
6125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6126_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6127{
fc9cf6da 6128 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6129 (__v16si) __W,
6130 (__mmask16) __U);
6131}
6132
6133extern __inline __m512i
6134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6135_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6136{
fc9cf6da 6137 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6138 (__v16si)
6139 _mm512_setzero_si512 (),
6140 (__mmask16) __U);
6141}
6142
6143extern __inline void
6144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6145_mm512_storeu_si512 (void *__P, __m512i __A)
756c5857 6146{
c6b0037d 6147 *(__m512i_u *)__P = __A;
756c5857
AI
6148}
6149
6150extern __inline void
6151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6153{
fc9cf6da 6154 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
756c5857
AI
6155 (__mmask16) __U);
6156}
6157
6158extern __inline __m512d
6159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6160_mm512_permutevar_pd (__m512d __A, __m512i __C)
6161{
6162 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6163 (__v8di) __C,
6164 (__v8df)
0b192937 6165 _mm512_undefined_pd (),
756c5857
AI
6166 (__mmask8) -1);
6167}
6168
6169extern __inline __m512d
6170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6171_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6172{
6173 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6174 (__v8di) __C,
6175 (__v8df) __W,
6176 (__mmask8) __U);
6177}
6178
6179extern __inline __m512d
6180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6181_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6182{
6183 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6184 (__v8di) __C,
6185 (__v8df)
6186 _mm512_setzero_pd (),
6187 (__mmask8) __U);
6188}
6189
6190extern __inline __m512
6191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6192_mm512_permutevar_ps (__m512 __A, __m512i __C)
6193{
6194 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6195 (__v16si) __C,
6196 (__v16sf)
0b192937 6197 _mm512_undefined_ps (),
756c5857
AI
6198 (__mmask16) -1);
6199}
6200
6201extern __inline __m512
6202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6203_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6204{
6205 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6206 (__v16si) __C,
6207 (__v16sf) __W,
6208 (__mmask16) __U);
6209}
6210
6211extern __inline __m512
6212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6213_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6214{
6215 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6216 (__v16si) __C,
6217 (__v16sf)
6218 _mm512_setzero_ps (),
6219 (__mmask16) __U);
6220}
6221
6222extern __inline __m512i
6223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6224_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6225{
6226 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6227 /* idx */ ,
6228 (__v8di) __A,
6229 (__v8di) __B,
6230 (__mmask8) -1);
6231}
6232
6233extern __inline __m512i
6234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6235_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6236 __m512i __B)
6237{
6238 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6239 /* idx */ ,
6240 (__v8di) __A,
6241 (__v8di) __B,
6242 (__mmask8) __U);
6243}
6244
6245extern __inline __m512i
6246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6247_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6248 __mmask8 __U, __m512i __B)
6249{
6250 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6251 (__v8di) __I
6252 /* idx */ ,
6253 (__v8di) __B,
6254 (__mmask8) __U);
6255}
6256
6257extern __inline __m512i
6258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6259_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6260 __m512i __I, __m512i __B)
6261{
6262 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6263 /* idx */ ,
6264 (__v8di) __A,
6265 (__v8di) __B,
6266 (__mmask8) __U);
6267}
6268
6269extern __inline __m512i
6270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6271_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6272{
6273 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6274 /* idx */ ,
6275 (__v16si) __A,
6276 (__v16si) __B,
6277 (__mmask16) -1);
6278}
6279
6280extern __inline __m512i
6281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6282_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6283 __m512i __I, __m512i __B)
6284{
6285 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6286 /* idx */ ,
6287 (__v16si) __A,
6288 (__v16si) __B,
6289 (__mmask16) __U);
6290}
6291
6292extern __inline __m512i
6293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6294_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6295 __mmask16 __U, __m512i __B)
6296{
6297 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6298 (__v16si) __I
6299 /* idx */ ,
6300 (__v16si) __B,
6301 (__mmask16) __U);
6302}
6303
6304extern __inline __m512i
6305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6306_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6307 __m512i __I, __m512i __B)
6308{
6309 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6310 /* idx */ ,
6311 (__v16si) __A,
6312 (__v16si) __B,
6313 (__mmask16) __U);
6314}
6315
6316extern __inline __m512d
6317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6318_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6319{
6320 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6321 /* idx */ ,
6322 (__v8df) __A,
6323 (__v8df) __B,
6324 (__mmask8) -1);
6325}
6326
6327extern __inline __m512d
6328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6329_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6330 __m512d __B)
6331{
6332 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6333 /* idx */ ,
6334 (__v8df) __A,
6335 (__v8df) __B,
6336 (__mmask8) __U);
6337}
6338
6339extern __inline __m512d
6340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6341_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6342 __m512d __B)
6343{
6344 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6345 (__v8di) __I
6346 /* idx */ ,
6347 (__v8df) __B,
6348 (__mmask8) __U);
6349}
6350
6351extern __inline __m512d
6352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6353_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6354 __m512d __B)
6355{
6356 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6357 /* idx */ ,
6358 (__v8df) __A,
6359 (__v8df) __B,
6360 (__mmask8) __U);
6361}
6362
6363extern __inline __m512
6364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6365_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6366{
6367 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6368 /* idx */ ,
6369 (__v16sf) __A,
6370 (__v16sf) __B,
6371 (__mmask16) -1);
6372}
6373
6374extern __inline __m512
6375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6376_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6377{
6378 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6379 /* idx */ ,
6380 (__v16sf) __A,
6381 (__v16sf) __B,
6382 (__mmask16) __U);
6383}
6384
6385extern __inline __m512
6386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6387_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6388 __m512 __B)
6389{
6390 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6391 (__v16si) __I
6392 /* idx */ ,
6393 (__v16sf) __B,
6394 (__mmask16) __U);
6395}
6396
6397extern __inline __m512
6398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6399_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6400 __m512 __B)
6401{
6402 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6403 /* idx */ ,
6404 (__v16sf) __A,
6405 (__v16sf) __B,
6406 (__mmask16) __U);
6407}
6408
6409#ifdef __OPTIMIZE__
6410extern __inline __m512d
6411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412_mm512_permute_pd (__m512d __X, const int __C)
6413{
6414 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6415 (__v8df)
0b192937 6416 _mm512_undefined_pd (),
756c5857
AI
6417 (__mmask8) -1);
6418}
6419
6420extern __inline __m512d
6421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6422_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6423{
6424 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6425 (__v8df) __W,
6426 (__mmask8) __U);
6427}
6428
6429extern __inline __m512d
6430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6431_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6432{
6433 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6434 (__v8df)
6435 _mm512_setzero_pd (),
6436 (__mmask8) __U);
6437}
6438
6439extern __inline __m512
6440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6441_mm512_permute_ps (__m512 __X, const int __C)
6442{
6443 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6444 (__v16sf)
0b192937 6445 _mm512_undefined_ps (),
756c5857
AI
6446 (__mmask16) -1);
6447}
6448
6449extern __inline __m512
6450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6451_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6452{
6453 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6454 (__v16sf) __W,
6455 (__mmask16) __U);
6456}
6457
6458extern __inline __m512
6459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6460_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6461{
6462 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6463 (__v16sf)
6464 _mm512_setzero_ps (),
6465 (__mmask16) __U);
6466}
6467#else
6468#define _mm512_permute_pd(X, C) \
6469 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
0b192937 6470 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6471 (__mmask8)(-1)))
6472
6473#define _mm512_mask_permute_pd(W, U, X, C) \
6474 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6475 (__v8df)(__m512d)(W), \
6476 (__mmask8)(U)))
6477
6478#define _mm512_maskz_permute_pd(U, X, C) \
6479 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6480 (__v8df)(__m512d)_mm512_setzero_pd(), \
6481 (__mmask8)(U)))
6482
6483#define _mm512_permute_ps(X, C) \
6484 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
0b192937 6485 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6486 (__mmask16)(-1)))
6487
6488#define _mm512_mask_permute_ps(W, U, X, C) \
6489 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6490 (__v16sf)(__m512)(W), \
6491 (__mmask16)(U)))
6492
6493#define _mm512_maskz_permute_ps(U, X, C) \
6494 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6495 (__v16sf)(__m512)_mm512_setzero_ps(), \
6496 (__mmask16)(U)))
6497#endif
6498
6499#ifdef __OPTIMIZE__
6500extern __inline __m512i
6501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6502_mm512_permutex_epi64 (__m512i __X, const int __I)
6503{
6504 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6505 (__v8di)
4271e5cb 6506 _mm512_undefined_epi32 (),
756c5857
AI
6507 (__mmask8) (-1));
6508}
6509
6510extern __inline __m512i
6511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6512_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6513 __m512i __X, const int __I)
6514{
6515 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6516 (__v8di) __W,
6517 (__mmask8) __M);
6518}
6519
6520extern __inline __m512i
6521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6522_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6523{
6524 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6525 (__v8di)
6526 _mm512_setzero_si512 (),
6527 (__mmask8) __M);
6528}
6529
6530extern __inline __m512d
6531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6532_mm512_permutex_pd (__m512d __X, const int __M)
6533{
6534 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6535 (__v8df)
0b192937 6536 _mm512_undefined_pd (),
756c5857
AI
6537 (__mmask8) -1);
6538}
6539
6540extern __inline __m512d
6541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6542_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6543{
6544 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6545 (__v8df) __W,
6546 (__mmask8) __U);
6547}
6548
6549extern __inline __m512d
6550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6552{
6553 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6554 (__v8df)
6555 _mm512_setzero_pd (),
6556 (__mmask8) __U);
6557}
6558#else
6559#define _mm512_permutex_pd(X, M) \
6560 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
0b192937
UD
6561 (__v8df)(__m512d)_mm512_undefined_pd(),\
6562 (__mmask8)-1))
756c5857
AI
6563
6564#define _mm512_mask_permutex_pd(W, U, X, M) \
6565 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6566 (__v8df)(__m512d)(W), (__mmask8)(U)))
6567
6568#define _mm512_maskz_permutex_pd(U, X, M) \
6569 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6570 (__v8df)(__m512d)_mm512_setzero_pd(),\
6571 (__mmask8)(U)))
6572
6573#define _mm512_permutex_epi64(X, I) \
6574 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6575 (int)(I), \
0b192937 6576 (__v8di)(__m512i) \
4271e5cb 6577 (_mm512_undefined_epi32 ()),\
756c5857
AI
6578 (__mmask8)(-1)))
6579
6580#define _mm512_maskz_permutex_epi64(M, X, I) \
6581 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6582 (int)(I), \
6583 (__v8di)(__m512i) \
6584 (_mm512_setzero_si512 ()),\
6585 (__mmask8)(M)))
6586
6587#define _mm512_mask_permutex_epi64(W, M, X, I) \
6588 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6589 (int)(I), \
6590 (__v8di)(__m512i)(W), \
6591 (__mmask8)(M)))
6592#endif
6593
6594extern __inline __m512i
6595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6596_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6597{
583a9919
KY
6598 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6599 (__v8di) __X,
756c5857
AI
6600 (__v8di)
6601 _mm512_setzero_si512 (),
6602 __M);
6603}
6604
6605extern __inline __m512i
6606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6607_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6608{
583a9919
KY
6609 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6610 (__v8di) __X,
756c5857 6611 (__v8di)
4271e5cb 6612 _mm512_undefined_epi32 (),
756c5857
AI
6613 (__mmask8) -1);
6614}
6615
6616extern __inline __m512i
6617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6618_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6619 __m512i __Y)
6620{
583a9919
KY
6621 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6622 (__v8di) __X,
756c5857
AI
6623 (__v8di) __W,
6624 __M);
6625}
6626
6627extern __inline __m512i
6628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6629_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6630{
583a9919
KY
6631 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6632 (__v16si) __X,
756c5857
AI
6633 (__v16si)
6634 _mm512_setzero_si512 (),
6635 __M);
6636}
6637
6638extern __inline __m512i
6639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6640_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6641{
583a9919
KY
6642 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6643 (__v16si) __X,
756c5857 6644 (__v16si)
4271e5cb 6645 _mm512_undefined_epi32 (),
756c5857
AI
6646 (__mmask16) -1);
6647}
6648
6649extern __inline __m512i
6650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6651_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6652 __m512i __Y)
6653{
583a9919
KY
6654 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6655 (__v16si) __X,
756c5857
AI
6656 (__v16si) __W,
6657 __M);
6658}
6659
6660extern __inline __m512d
6661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6662_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6663{
6664 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6665 (__v8di) __X,
6666 (__v8df)
0b192937 6667 _mm512_undefined_pd (),
756c5857
AI
6668 (__mmask8) -1);
6669}
6670
6671extern __inline __m512d
6672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6673_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6674{
6675 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6676 (__v8di) __X,
6677 (__v8df) __W,
6678 (__mmask8) __U);
6679}
6680
6681extern __inline __m512d
6682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6683_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6684{
6685 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6686 (__v8di) __X,
6687 (__v8df)
6688 _mm512_setzero_pd (),
6689 (__mmask8) __U);
6690}
6691
6692extern __inline __m512
6693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6694_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6695{
6696 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6697 (__v16si) __X,
6698 (__v16sf)
0b192937 6699 _mm512_undefined_ps (),
756c5857
AI
6700 (__mmask16) -1);
6701}
6702
6703extern __inline __m512
6704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6705_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6706{
6707 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6708 (__v16si) __X,
6709 (__v16sf) __W,
6710 (__mmask16) __U);
6711}
6712
6713extern __inline __m512
6714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6715_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6716{
6717 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6718 (__v16si) __X,
6719 (__v16sf)
6720 _mm512_setzero_ps (),
6721 (__mmask16) __U);
6722}
6723
6724#ifdef __OPTIMIZE__
6725extern __inline __m512
6726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6727_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6728{
6729 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6730 (__v16sf) __V, __imm,
6731 (__v16sf)
0b192937 6732 _mm512_undefined_ps (),
756c5857
AI
6733 (__mmask16) -1);
6734}
6735
6736extern __inline __m512
6737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6738_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6739 __m512 __V, const int __imm)
6740{
6741 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6742 (__v16sf) __V, __imm,
6743 (__v16sf) __W,
6744 (__mmask16) __U);
6745}
6746
6747extern __inline __m512
6748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6749_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6750{
6751 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6752 (__v16sf) __V, __imm,
6753 (__v16sf)
6754 _mm512_setzero_ps (),
6755 (__mmask16) __U);
6756}
6757
6758extern __inline __m512d
6759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6760_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6761{
6762 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6763 (__v8df) __V, __imm,
6764 (__v8df)
0b192937 6765 _mm512_undefined_pd (),
756c5857
AI
6766 (__mmask8) -1);
6767}
6768
6769extern __inline __m512d
6770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6771_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6772 __m512d __V, const int __imm)
6773{
6774 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6775 (__v8df) __V, __imm,
6776 (__v8df) __W,
6777 (__mmask8) __U);
6778}
6779
6780extern __inline __m512d
6781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6782_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6783 const int __imm)
6784{
6785 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6786 (__v8df) __V, __imm,
6787 (__v8df)
6788 _mm512_setzero_pd (),
6789 (__mmask8) __U);
6790}
6791
6792extern __inline __m512d
6793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6794_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6795 const int __imm, const int __R)
6796{
6797 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6798 (__v8df) __B,
6799 (__v8di) __C,
6800 __imm,
6801 (__mmask8) -1, __R);
6802}
6803
6804extern __inline __m512d
6805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6806_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6807 __m512i __C, const int __imm, const int __R)
6808{
6809 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6810 (__v8df) __B,
6811 (__v8di) __C,
6812 __imm,
6813 (__mmask8) __U, __R);
6814}
6815
6816extern __inline __m512d
6817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6818_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6819 __m512i __C, const int __imm, const int __R)
6820{
6821 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6822 (__v8df) __B,
6823 (__v8di) __C,
6824 __imm,
6825 (__mmask8) __U, __R);
6826}
6827
6828extern __inline __m512
6829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6830_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6831 const int __imm, const int __R)
6832{
6833 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6834 (__v16sf) __B,
6835 (__v16si) __C,
6836 __imm,
6837 (__mmask16) -1, __R);
6838}
6839
6840extern __inline __m512
6841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6842_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6843 __m512i __C, const int __imm, const int __R)
6844{
6845 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6846 (__v16sf) __B,
6847 (__v16si) __C,
6848 __imm,
6849 (__mmask16) __U, __R);
6850}
6851
6852extern __inline __m512
6853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6854_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6855 __m512i __C, const int __imm, const int __R)
6856{
6857 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6858 (__v16sf) __B,
6859 (__v16si) __C,
6860 __imm,
6861 (__mmask16) __U, __R);
6862}
6863
6864extern __inline __m128d
6865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6866_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6867 const int __imm, const int __R)
6868{
6869 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6870 (__v2df) __B,
6871 (__v2di) __C, __imm,
6872 (__mmask8) -1, __R);
6873}
6874
6875extern __inline __m128d
6876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6877_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6878 __m128i __C, const int __imm, const int __R)
6879{
6880 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6881 (__v2df) __B,
6882 (__v2di) __C, __imm,
6883 (__mmask8) __U, __R);
6884}
6885
6886extern __inline __m128d
6887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6888_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6889 __m128i __C, const int __imm, const int __R)
6890{
6891 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6892 (__v2df) __B,
6893 (__v2di) __C,
6894 __imm,
6895 (__mmask8) __U, __R);
6896}
6897
6898extern __inline __m128
6899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6901 const int __imm, const int __R)
6902{
6903 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6904 (__v4sf) __B,
6905 (__v4si) __C, __imm,
6906 (__mmask8) -1, __R);
6907}
6908
6909extern __inline __m128
6910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6912 __m128i __C, const int __imm, const int __R)
6913{
6914 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6915 (__v4sf) __B,
6916 (__v4si) __C, __imm,
6917 (__mmask8) __U, __R);
6918}
6919
6920extern __inline __m128
6921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6923 __m128i __C, const int __imm, const int __R)
6924{
6925 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6926 (__v4sf) __B,
6927 (__v4si) __C, __imm,
6928 (__mmask8) __U, __R);
6929}
6930
6931#else
6932#define _mm512_shuffle_pd(X, Y, C) \
6933 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6934 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 6935 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6936 (__mmask8)-1))
6937
6938#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6939 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6940 (__v8df)(__m512d)(Y), (int)(C),\
6941 (__v8df)(__m512d)(W),\
6942 (__mmask8)(U)))
6943
6944#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6945 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6946 (__v8df)(__m512d)(Y), (int)(C),\
6947 (__v8df)(__m512d)_mm512_setzero_pd(),\
6948 (__mmask8)(U)))
6949
6950#define _mm512_shuffle_ps(X, Y, C) \
6951 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6952 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 6953 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6954 (__mmask16)-1))
6955
6956#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6957 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6958 (__v16sf)(__m512)(Y), (int)(C),\
6959 (__v16sf)(__m512)(W),\
6960 (__mmask16)(U)))
6961
6962#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6963 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6964 (__v16sf)(__m512)(Y), (int)(C),\
6965 (__v16sf)(__m512)_mm512_setzero_ps(),\
6966 (__mmask16)(U)))
6967
6968#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6969 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6970 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6971 (__mmask8)(-1), (R)))
6972
6973#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6974 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6975 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6976 (__mmask8)(U), (R)))
6977
6978#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6979 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6980 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6981 (__mmask8)(U), (R)))
6982
6983#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6984 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6985 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6986 (__mmask16)(-1), (R)))
6987
6988#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6989 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6990 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6991 (__mmask16)(U), (R)))
6992
6993#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6994 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6995 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6996 (__mmask16)(U), (R)))
6997
6998#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6999 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7000 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7001 (__mmask8)(-1), (R)))
7002
7003#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
7004 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7005 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7006 (__mmask8)(U), (R)))
7007
7008#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
7009 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
7010 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7011 (__mmask8)(U), (R)))
7012
7013#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7014 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7015 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7016 (__mmask8)(-1), (R)))
7017
7018#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
7019 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7020 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7021 (__mmask8)(U), (R)))
7022
7023#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
7024 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
7025 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7026 (__mmask8)(U), (R)))
7027#endif
7028
7029extern __inline __m512
7030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7031_mm512_movehdup_ps (__m512 __A)
7032{
7033 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7034 (__v16sf)
0b192937 7035 _mm512_undefined_ps (),
756c5857
AI
7036 (__mmask16) -1);
7037}
7038
7039extern __inline __m512
7040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7041_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7042{
7043 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7044 (__v16sf) __W,
7045 (__mmask16) __U);
7046}
7047
7048extern __inline __m512
7049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7050_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7051{
7052 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7053 (__v16sf)
7054 _mm512_setzero_ps (),
7055 (__mmask16) __U);
7056}
7057
7058extern __inline __m512
7059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7060_mm512_moveldup_ps (__m512 __A)
7061{
7062 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7063 (__v16sf)
0b192937 7064 _mm512_undefined_ps (),
756c5857
AI
7065 (__mmask16) -1);
7066}
7067
7068extern __inline __m512
7069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7070_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7071{
7072 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7073 (__v16sf) __W,
7074 (__mmask16) __U);
7075}
7076
7077extern __inline __m512
7078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7079_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7080{
7081 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7082 (__v16sf)
7083 _mm512_setzero_ps (),
7084 (__mmask16) __U);
7085}
7086
7087extern __inline __m512i
7088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7089_mm512_or_si512 (__m512i __A, __m512i __B)
7090{
2069d6fc 7091 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7092}
7093
7094extern __inline __m512i
7095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7096_mm512_or_epi32 (__m512i __A, __m512i __B)
7097{
2069d6fc 7098 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7099}
7100
7101extern __inline __m512i
7102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7103_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7104{
7105 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7106 (__v16si) __B,
7107 (__v16si) __W,
7108 (__mmask16) __U);
7109}
7110
7111extern __inline __m512i
7112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7113_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7114{
7115 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7116 (__v16si) __B,
7117 (__v16si)
7118 _mm512_setzero_si512 (),
7119 (__mmask16) __U);
7120}
7121
7122extern __inline __m512i
7123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7124_mm512_or_epi64 (__m512i __A, __m512i __B)
7125{
2069d6fc 7126 return (__m512i) ((__v8du) __A | (__v8du) __B);
756c5857
AI
7127}
7128
7129extern __inline __m512i
7130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7132{
7133 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7134 (__v8di) __B,
7135 (__v8di) __W,
7136 (__mmask8) __U);
7137}
7138
7139extern __inline __m512i
7140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7141_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7142{
7143 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7144 (__v8di) __B,
7145 (__v8di)
7146 _mm512_setzero_si512 (),
7147 (__mmask8) __U);
7148}
7149
7150extern __inline __m512i
7151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7152_mm512_xor_si512 (__m512i __A, __m512i __B)
7153{
2069d6fc 7154 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7155}
7156
7157extern __inline __m512i
7158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7159_mm512_xor_epi32 (__m512i __A, __m512i __B)
7160{
2069d6fc 7161 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7162}
7163
7164extern __inline __m512i
7165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7166_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7167{
7168 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7169 (__v16si) __B,
7170 (__v16si) __W,
7171 (__mmask16) __U);
7172}
7173
7174extern __inline __m512i
7175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7176_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7177{
7178 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7179 (__v16si) __B,
7180 (__v16si)
7181 _mm512_setzero_si512 (),
7182 (__mmask16) __U);
7183}
7184
7185extern __inline __m512i
7186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7187_mm512_xor_epi64 (__m512i __A, __m512i __B)
7188{
2069d6fc 7189 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
756c5857
AI
7190}
7191
7192extern __inline __m512i
7193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7195{
7196 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7197 (__v8di) __B,
7198 (__v8di) __W,
7199 (__mmask8) __U);
7200}
7201
7202extern __inline __m512i
7203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7204_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
7205{
7206 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7207 (__v8di) __B,
7208 (__v8di)
7209 _mm512_setzero_si512 (),
7210 (__mmask8) __U);
7211}
7212
7213#ifdef __OPTIMIZE__
7214extern __inline __m512i
7215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7216_mm512_rol_epi32 (__m512i __A, const int __B)
7217{
7218 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7219 (__v16si)
4271e5cb 7220 _mm512_undefined_epi32 (),
756c5857
AI
7221 (__mmask16) -1);
7222}
7223
7224extern __inline __m512i
7225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7226_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7227{
7228 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7229 (__v16si) __W,
7230 (__mmask16) __U);
7231}
7232
7233extern __inline __m512i
7234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7236{
7237 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7238 (__v16si)
7239 _mm512_setzero_si512 (),
7240 (__mmask16) __U);
7241}
7242
7243extern __inline __m512i
7244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7245_mm512_ror_epi32 (__m512i __A, int __B)
7246{
7247 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7248 (__v16si)
4271e5cb 7249 _mm512_undefined_epi32 (),
756c5857
AI
7250 (__mmask16) -1);
7251}
7252
7253extern __inline __m512i
7254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7255_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7256{
7257 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7258 (__v16si) __W,
7259 (__mmask16) __U);
7260}
7261
7262extern __inline __m512i
7263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7264_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7265{
7266 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7267 (__v16si)
7268 _mm512_setzero_si512 (),
7269 (__mmask16) __U);
7270}
7271
7272extern __inline __m512i
7273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7274_mm512_rol_epi64 (__m512i __A, const int __B)
7275{
7276 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7277 (__v8di)
4271e5cb 7278 _mm512_undefined_epi32 (),
756c5857
AI
7279 (__mmask8) -1);
7280}
7281
7282extern __inline __m512i
7283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7285{
7286 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7287 (__v8di) __W,
7288 (__mmask8) __U);
7289}
7290
7291extern __inline __m512i
7292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7294{
7295 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7296 (__v8di)
7297 _mm512_setzero_si512 (),
7298 (__mmask8) __U);
7299}
7300
7301extern __inline __m512i
7302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7303_mm512_ror_epi64 (__m512i __A, int __B)
7304{
7305 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7306 (__v8di)
4271e5cb 7307 _mm512_undefined_epi32 (),
756c5857
AI
7308 (__mmask8) -1);
7309}
7310
7311extern __inline __m512i
7312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7313_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7314{
7315 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7316 (__v8di) __W,
7317 (__mmask8) __U);
7318}
7319
7320extern __inline __m512i
7321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7322_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7323{
7324 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7325 (__v8di)
7326 _mm512_setzero_si512 (),
7327 (__mmask8) __U);
7328}
7329
7330#else
7331#define _mm512_rol_epi32(A, B) \
7332 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7333 (int)(B), \
4271e5cb 7334 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7335 (__mmask16)(-1)))
7336#define _mm512_mask_rol_epi32(W, U, A, B) \
7337 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7338 (int)(B), \
7339 (__v16si)(__m512i)(W), \
7340 (__mmask16)(U)))
7341#define _mm512_maskz_rol_epi32(U, A, B) \
7342 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7343 (int)(B), \
7344 (__v16si)_mm512_setzero_si512 (), \
7345 (__mmask16)(U)))
7346#define _mm512_ror_epi32(A, B) \
7347 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7348 (int)(B), \
4271e5cb 7349 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7350 (__mmask16)(-1)))
7351#define _mm512_mask_ror_epi32(W, U, A, B) \
7352 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7353 (int)(B), \
7354 (__v16si)(__m512i)(W), \
7355 (__mmask16)(U)))
7356#define _mm512_maskz_ror_epi32(U, A, B) \
7357 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7358 (int)(B), \
7359 (__v16si)_mm512_setzero_si512 (), \
7360 (__mmask16)(U)))
7361#define _mm512_rol_epi64(A, B) \
7362 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7363 (int)(B), \
4271e5cb 7364 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7365 (__mmask8)(-1)))
7366#define _mm512_mask_rol_epi64(W, U, A, B) \
7367 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7368 (int)(B), \
7369 (__v8di)(__m512i)(W), \
7370 (__mmask8)(U)))
7371#define _mm512_maskz_rol_epi64(U, A, B) \
7372 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7373 (int)(B), \
7374 (__v8di)_mm512_setzero_si512 (), \
7375 (__mmask8)(U)))
7376
7377#define _mm512_ror_epi64(A, B) \
7378 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7379 (int)(B), \
4271e5cb 7380 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7381 (__mmask8)(-1)))
7382#define _mm512_mask_ror_epi64(W, U, A, B) \
7383 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7384 (int)(B), \
7385 (__v8di)(__m512i)(W), \
7386 (__mmask8)(U)))
7387#define _mm512_maskz_ror_epi64(U, A, B) \
7388 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7389 (int)(B), \
7390 (__v8di)_mm512_setzero_si512 (), \
7391 (__mmask8)(U)))
7392#endif
7393
7394extern __inline __m512i
7395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7396_mm512_and_si512 (__m512i __A, __m512i __B)
7397{
2069d6fc 7398 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7399}
7400
7401extern __inline __m512i
7402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7403_mm512_and_epi32 (__m512i __A, __m512i __B)
7404{
2069d6fc 7405 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7406}
7407
7408extern __inline __m512i
7409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7410_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7411{
7412 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7413 (__v16si) __B,
7414 (__v16si) __W,
7415 (__mmask16) __U);
7416}
7417
7418extern __inline __m512i
7419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7420_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7421{
7422 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7423 (__v16si) __B,
7424 (__v16si)
7425 _mm512_setzero_si512 (),
7426 (__mmask16) __U);
7427}
7428
7429extern __inline __m512i
7430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7431_mm512_and_epi64 (__m512i __A, __m512i __B)
7432{
2069d6fc 7433 return (__m512i) ((__v8du) __A & (__v8du) __B);
756c5857
AI
7434}
7435
7436extern __inline __m512i
7437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7438_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7439{
7440 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7441 (__v8di) __B,
7442 (__v8di) __W, __U);
7443}
7444
7445extern __inline __m512i
7446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7447_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7448{
7449 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7450 (__v8di) __B,
7451 (__v8di)
7452 _mm512_setzero_pd (),
7453 __U);
7454}
7455
7456extern __inline __m512i
7457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458_mm512_andnot_si512 (__m512i __A, __m512i __B)
7459{
7460 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7461 (__v16si) __B,
7462 (__v16si)
4271e5cb 7463 _mm512_undefined_epi32 (),
756c5857
AI
7464 (__mmask16) -1);
7465}
7466
7467extern __inline __m512i
7468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469_mm512_andnot_epi32 (__m512i __A, __m512i __B)
7470{
7471 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7472 (__v16si) __B,
7473 (__v16si)
4271e5cb 7474 _mm512_undefined_epi32 (),
756c5857
AI
7475 (__mmask16) -1);
7476}
7477
7478extern __inline __m512i
7479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7480_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7481{
7482 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7483 (__v16si) __B,
7484 (__v16si) __W,
7485 (__mmask16) __U);
7486}
7487
7488extern __inline __m512i
7489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7491{
7492 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7493 (__v16si) __B,
7494 (__v16si)
7495 _mm512_setzero_si512 (),
7496 (__mmask16) __U);
7497}
7498
7499extern __inline __m512i
7500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501_mm512_andnot_epi64 (__m512i __A, __m512i __B)
7502{
7503 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7504 (__v8di) __B,
7505 (__v8di)
4271e5cb 7506 _mm512_undefined_epi32 (),
756c5857
AI
7507 (__mmask8) -1);
7508}
7509
7510extern __inline __m512i
7511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7512_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7513{
7514 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7515 (__v8di) __B,
7516 (__v8di) __W, __U);
7517}
7518
7519extern __inline __m512i
7520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7521_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7522{
7523 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7524 (__v8di) __B,
7525 (__v8di)
7526 _mm512_setzero_pd (),
7527 __U);
7528}
7529
7530extern __inline __mmask16
7531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7533{
7534 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7535 (__v16si) __B,
7536 (__mmask16) -1);
7537}
7538
7539extern __inline __mmask16
7540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7541_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7542{
7543 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7544 (__v16si) __B, __U);
7545}
7546
7547extern __inline __mmask8
7548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7549_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7550{
7551 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7552 (__v8di) __B,
7553 (__mmask8) -1);
7554}
7555
7556extern __inline __mmask8
7557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7558_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7559{
7560 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7561}
7562
260d3642
IT
7563extern __inline __mmask16
7564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7566{
7567 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7568 (__v16si) __B,
7569 (__mmask16) -1);
7570}
7571
7572extern __inline __mmask16
7573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7574_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7575{
7576 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7577 (__v16si) __B, __U);
7578}
7579
7580extern __inline __mmask8
7581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7583{
7584 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7585 (__v8di) __B,
7586 (__mmask8) -1);
7587}
7588
7589extern __inline __mmask8
7590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7591_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7592{
7593 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7594 (__v8di) __B, __U);
7595}
7596
dcb2c527
JJ
7597extern __inline __m512
7598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7599_mm512_abs_ps (__m512 __A)
7600{
7601 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7602 _mm512_set1_epi32 (0x7fffffff));
7603}
7604
7605extern __inline __m512
7606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7607_mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7608{
7609 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7610 _mm512_set1_epi32 (0x7fffffff));
7611}
7612
7613extern __inline __m512d
7614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7615_mm512_abs_pd (__m512 __A)
7616{
7617 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7618 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7619}
7620
7621extern __inline __m512d
7622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7623_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512 __A)
7624{
7625 return (__m512d)
7626 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7627 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7628}
7629
756c5857
AI
7630extern __inline __m512i
7631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7632_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7633{
7634 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7635 (__v16si) __B,
7636 (__v16si)
4271e5cb 7637 _mm512_undefined_epi32 (),
756c5857
AI
7638 (__mmask16) -1);
7639}
7640
7641extern __inline __m512i
7642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7643_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7644 __m512i __B)
7645{
7646 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7647 (__v16si) __B,
7648 (__v16si) __W,
7649 (__mmask16) __U);
7650}
7651
7652extern __inline __m512i
7653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7654_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7655{
7656 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7657 (__v16si) __B,
7658 (__v16si)
7659 _mm512_setzero_si512 (),
7660 (__mmask16) __U);
7661}
7662
7663extern __inline __m512i
7664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7665_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7666{
7667 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7668 (__v8di) __B,
7669 (__v8di)
4271e5cb 7670 _mm512_undefined_epi32 (),
756c5857
AI
7671 (__mmask8) -1);
7672}
7673
7674extern __inline __m512i
7675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7676_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7677{
7678 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7679 (__v8di) __B,
7680 (__v8di) __W,
7681 (__mmask8) __U);
7682}
7683
7684extern __inline __m512i
7685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7686_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7687{
7688 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7689 (__v8di) __B,
7690 (__v8di)
7691 _mm512_setzero_si512 (),
7692 (__mmask8) __U);
7693}
7694
7695extern __inline __m512i
7696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7697_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7698{
7699 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7700 (__v16si) __B,
7701 (__v16si)
4271e5cb 7702 _mm512_undefined_epi32 (),
756c5857
AI
7703 (__mmask16) -1);
7704}
7705
7706extern __inline __m512i
7707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7708_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7709 __m512i __B)
7710{
7711 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7712 (__v16si) __B,
7713 (__v16si) __W,
7714 (__mmask16) __U);
7715}
7716
7717extern __inline __m512i
7718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7719_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7720{
7721 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7722 (__v16si) __B,
7723 (__v16si)
7724 _mm512_setzero_si512 (),
7725 (__mmask16) __U);
7726}
7727
7728extern __inline __m512i
7729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7731{
7732 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7733 (__v8di) __B,
7734 (__v8di)
4271e5cb 7735 _mm512_undefined_epi32 (),
756c5857
AI
7736 (__mmask8) -1);
7737}
7738
7739extern __inline __m512i
7740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7742{
7743 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7744 (__v8di) __B,
7745 (__v8di) __W,
7746 (__mmask8) __U);
7747}
7748
7749extern __inline __m512i
7750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7751_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7752{
7753 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7754 (__v8di) __B,
7755 (__v8di)
7756 _mm512_setzero_si512 (),
7757 (__mmask8) __U);
7758}
7759
7760#ifdef __x86_64__
7761#ifdef __OPTIMIZE__
7762extern __inline unsigned long long
7763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7764_mm_cvt_roundss_u64 (__m128 __A, const int __R)
7765{
7766 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7767}
7768
7769extern __inline long long
7770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7771_mm_cvt_roundss_si64 (__m128 __A, const int __R)
7772{
7773 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7774}
7775
7776extern __inline long long
7777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7778_mm_cvt_roundss_i64 (__m128 __A, const int __R)
7779{
7780 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7781}
7782
7783extern __inline unsigned long long
7784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7785_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7786{
7787 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7788}
7789
7790extern __inline long long
7791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7792_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7793{
7794 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7795}
7796
7797extern __inline long long
7798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7799_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7800{
7801 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7802}
7803#else
7804#define _mm_cvt_roundss_u64(A, B) \
7805 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7806
7807#define _mm_cvt_roundss_si64(A, B) \
7808 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7809
7810#define _mm_cvt_roundss_i64(A, B) \
7811 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7812
7813#define _mm_cvtt_roundss_u64(A, B) \
7814 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7815
7816#define _mm_cvtt_roundss_i64(A, B) \
7817 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7818
7819#define _mm_cvtt_roundss_si64(A, B) \
7820 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7821#endif
7822#endif
7823
7824#ifdef __OPTIMIZE__
7825extern __inline unsigned
7826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827_mm_cvt_roundss_u32 (__m128 __A, const int __R)
7828{
7829 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7830}
7831
7832extern __inline int
7833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7834_mm_cvt_roundss_si32 (__m128 __A, const int __R)
7835{
7836 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7837}
7838
7839extern __inline int
7840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841_mm_cvt_roundss_i32 (__m128 __A, const int __R)
7842{
7843 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7844}
7845
7846extern __inline unsigned
7847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7849{
7850 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7851}
7852
7853extern __inline int
7854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7855_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7856{
7857 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7858}
7859
7860extern __inline int
7861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7863{
7864 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7865}
7866#else
7867#define _mm_cvt_roundss_u32(A, B) \
7868 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7869
7870#define _mm_cvt_roundss_si32(A, B) \
7871 ((int)__builtin_ia32_vcvtss2si32(A, B))
7872
7873#define _mm_cvt_roundss_i32(A, B) \
7874 ((int)__builtin_ia32_vcvtss2si32(A, B))
7875
7876#define _mm_cvtt_roundss_u32(A, B) \
7877 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7878
7879#define _mm_cvtt_roundss_si32(A, B) \
7880 ((int)__builtin_ia32_vcvttss2si32(A, B))
7881
7882#define _mm_cvtt_roundss_i32(A, B) \
7883 ((int)__builtin_ia32_vcvttss2si32(A, B))
7884#endif
7885
7886#ifdef __x86_64__
7887#ifdef __OPTIMIZE__
7888extern __inline unsigned long long
7889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7891{
7892 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7893}
7894
7895extern __inline long long
7896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7897_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7898{
7899 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7900}
7901
7902extern __inline long long
7903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7904_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7905{
7906 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7907}
7908
7909extern __inline unsigned long long
7910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7911_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7912{
7913 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7914}
7915
7916extern __inline long long
7917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7918_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7919{
7920 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7921}
7922
7923extern __inline long long
7924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7925_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7926{
7927 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7928}
7929#else
7930#define _mm_cvt_roundsd_u64(A, B) \
7931 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7932
7933#define _mm_cvt_roundsd_si64(A, B) \
7934 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7935
7936#define _mm_cvt_roundsd_i64(A, B) \
7937 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7938
7939#define _mm_cvtt_roundsd_u64(A, B) \
7940 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7941
7942#define _mm_cvtt_roundsd_si64(A, B) \
7943 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7944
7945#define _mm_cvtt_roundsd_i64(A, B) \
7946 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7947#endif
7948#endif
7949
7950#ifdef __OPTIMIZE__
7951extern __inline unsigned
7952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7954{
7955 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7956}
7957
7958extern __inline int
7959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7960_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7961{
7962 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7963}
7964
7965extern __inline int
7966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7967_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7968{
7969 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7970}
7971
7972extern __inline unsigned
7973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7974_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7975{
7976 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7977}
7978
7979extern __inline int
7980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7981_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7982{
7983 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7984}
7985
7986extern __inline int
7987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7988_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7989{
7990 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7991}
7992#else
7993#define _mm_cvt_roundsd_u32(A, B) \
7994 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7995
7996#define _mm_cvt_roundsd_si32(A, B) \
7997 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7998
7999#define _mm_cvt_roundsd_i32(A, B) \
8000 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8001
8002#define _mm_cvtt_roundsd_u32(A, B) \
8003 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8004
8005#define _mm_cvtt_roundsd_si32(A, B) \
8006 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8007
8008#define _mm_cvtt_roundsd_i32(A, B) \
8009 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8010#endif
8011
8012extern __inline __m512d
8013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8014_mm512_movedup_pd (__m512d __A)
8015{
8016 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8017 (__v8df)
0b192937 8018 _mm512_undefined_pd (),
756c5857
AI
8019 (__mmask8) -1);
8020}
8021
8022extern __inline __m512d
8023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8024_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8025{
8026 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8027 (__v8df) __W,
8028 (__mmask8) __U);
8029}
8030
8031extern __inline __m512d
8032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8033_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8034{
8035 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8036 (__v8df)
8037 _mm512_setzero_pd (),
8038 (__mmask8) __U);
8039}
8040
8041extern __inline __m512d
8042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8043_mm512_unpacklo_pd (__m512d __A, __m512d __B)
8044{
8045 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8046 (__v8df) __B,
8047 (__v8df)
0b192937 8048 _mm512_undefined_pd (),
756c5857
AI
8049 (__mmask8) -1);
8050}
8051
8052extern __inline __m512d
8053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8055{
8056 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8057 (__v8df) __B,
8058 (__v8df) __W,
8059 (__mmask8) __U);
8060}
8061
8062extern __inline __m512d
8063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8064_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8065{
8066 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8067 (__v8df) __B,
8068 (__v8df)
8069 _mm512_setzero_pd (),
8070 (__mmask8) __U);
8071}
8072
8073extern __inline __m512d
8074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8075_mm512_unpackhi_pd (__m512d __A, __m512d __B)
8076{
8077 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8078 (__v8df) __B,
8079 (__v8df)
0b192937 8080 _mm512_undefined_pd (),
756c5857
AI
8081 (__mmask8) -1);
8082}
8083
8084extern __inline __m512d
8085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8086_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8087{
8088 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8089 (__v8df) __B,
8090 (__v8df) __W,
8091 (__mmask8) __U);
8092}
8093
8094extern __inline __m512d
8095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8097{
8098 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8099 (__v8df) __B,
8100 (__v8df)
8101 _mm512_setzero_pd (),
8102 (__mmask8) __U);
8103}
8104
8105extern __inline __m512
8106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8107_mm512_unpackhi_ps (__m512 __A, __m512 __B)
8108{
8109 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8110 (__v16sf) __B,
8111 (__v16sf)
0b192937 8112 _mm512_undefined_ps (),
756c5857
AI
8113 (__mmask16) -1);
8114}
8115
8116extern __inline __m512
8117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8118_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8119{
8120 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8121 (__v16sf) __B,
8122 (__v16sf) __W,
8123 (__mmask16) __U);
8124}
8125
8126extern __inline __m512
8127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8128_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8129{
8130 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8131 (__v16sf) __B,
8132 (__v16sf)
8133 _mm512_setzero_ps (),
8134 (__mmask16) __U);
8135}
8136
8137#ifdef __OPTIMIZE__
8138extern __inline __m512d
8139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8140_mm512_cvt_roundps_pd (__m256 __A, const int __R)
8141{
8142 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8143 (__v8df)
0b192937 8144 _mm512_undefined_pd (),
756c5857
AI
8145 (__mmask8) -1, __R);
8146}
8147
8148extern __inline __m512d
8149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8150_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8151 const int __R)
8152{
8153 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8154 (__v8df) __W,
8155 (__mmask8) __U, __R);
8156}
8157
8158extern __inline __m512d
8159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8160_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8161{
8162 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8163 (__v8df)
8164 _mm512_setzero_pd (),
8165 (__mmask8) __U, __R);
8166}
8167
8168extern __inline __m512
8169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8170_mm512_cvt_roundph_ps (__m256i __A, const int __R)
8171{
8172 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8173 (__v16sf)
0b192937 8174 _mm512_undefined_ps (),
756c5857
AI
8175 (__mmask16) -1, __R);
8176}
8177
8178extern __inline __m512
8179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8181 const int __R)
8182{
8183 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8184 (__v16sf) __W,
8185 (__mmask16) __U, __R);
8186}
8187
8188extern __inline __m512
8189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8190_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8191{
8192 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8193 (__v16sf)
8194 _mm512_setzero_ps (),
8195 (__mmask16) __U, __R);
8196}
8197
8198extern __inline __m256i
8199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200_mm512_cvt_roundps_ph (__m512 __A, const int __I)
8201{
8202 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8203 __I,
8204 (__v16hi)
0b192937 8205 _mm256_undefined_si256 (),
756c5857
AI
8206 -1);
8207}
8208
8209extern __inline __m256i
8210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8211_mm512_cvtps_ph (__m512 __A, const int __I)
8212{
8213 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8214 __I,
8215 (__v16hi)
0b192937 8216 _mm256_undefined_si256 (),
756c5857
AI
8217 -1);
8218}
8219
8220extern __inline __m256i
8221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8222_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8223 const int __I)
8224{
8225 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8226 __I,
8227 (__v16hi) __U,
8228 (__mmask16) __W);
8229}
8230
8231extern __inline __m256i
8232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8233_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8234{
8235 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8236 __I,
8237 (__v16hi) __U,
8238 (__mmask16) __W);
8239}
8240
8241extern __inline __m256i
8242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8243_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8244{
8245 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8246 __I,
8247 (__v16hi)
8248 _mm256_setzero_si256 (),
8249 (__mmask16) __W);
8250}
8251
8252extern __inline __m256i
8253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8254_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8255{
8256 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8257 __I,
8258 (__v16hi)
8259 _mm256_setzero_si256 (),
8260 (__mmask16) __W);
8261}
8262#else
8263#define _mm512_cvt_roundps_pd(A, B) \
0b192937 8264 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
756c5857
AI
8265
8266#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8267 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8268
8269#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8270 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8271
8272#define _mm512_cvt_roundph_ps(A, B) \
0b192937 8273 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
8274
8275#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8276 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8277
8278#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8279 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8280
8281#define _mm512_cvt_roundps_ph(A, I) \
8282 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0b192937 8283 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857
AI
8284#define _mm512_cvtps_ph(A, I) \
8285 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0b192937 8286 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857
AI
8287#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
8288 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8289 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8290#define _mm512_mask_cvtps_ph(U, W, A, I) \
8291 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8292 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8293#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
8294 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8295 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8296#define _mm512_maskz_cvtps_ph(W, A, I) \
8297 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8298 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8299#endif
8300
8301#ifdef __OPTIMIZE__
8302extern __inline __m256
8303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8305{
8306 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8307 (__v8sf)
0b192937 8308 _mm256_undefined_ps (),
756c5857
AI
8309 (__mmask8) -1, __R);
8310}
8311
8312extern __inline __m256
8313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8315 const int __R)
8316{
8317 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8318 (__v8sf) __W,
8319 (__mmask8) __U, __R);
8320}
8321
8322extern __inline __m256
8323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8324_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8325{
8326 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8327 (__v8sf)
8328 _mm256_setzero_ps (),
8329 (__mmask8) __U, __R);
8330}
8331
075691af
AI
8332extern __inline __m128
8333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8335{
8336 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8337 (__v2df) __B,
8338 __R);
8339}
8340
8341extern __inline __m128d
8342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8343_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8344{
8345 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8346 (__v4sf) __B,
8347 __R);
8348}
756c5857
AI
8349#else
8350#define _mm512_cvt_roundpd_ps(A, B) \
0b192937 8351 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
756c5857
AI
8352
8353#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8354 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8355
8356#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8357 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
075691af
AI
8358
8359#define _mm_cvt_roundsd_ss(A, B, C) \
8360 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8361
8362#define _mm_cvt_roundss_sd(A, B, C) \
8363 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
756c5857
AI
8364#endif
8365
8366extern __inline void
8367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8368_mm512_stream_si512 (__m512i * __P, __m512i __A)
8369{
8370 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8371}
8372
8373extern __inline void
8374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8375_mm512_stream_ps (float *__P, __m512 __A)
8376{
8377 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8378}
8379
8380extern __inline void
8381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8382_mm512_stream_pd (double *__P, __m512d __A)
8383{
8384 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8385}
8386
c56a42b9
KY
8387extern __inline __m512i
8388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8389_mm512_stream_load_si512 (void *__P)
8390{
8391 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8392}
8393
9c3c2608
UB
8394/* Constants for mantissa extraction */
8395typedef enum
8396{
8397 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8398 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8399 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8400 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8401} _MM_MANTISSA_NORM_ENUM;
8402
8403typedef enum
8404{
8405 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8406 _MM_MANT_SIGN_zero, /* sign = 0 */
8407 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8408} _MM_MANTISSA_SIGN_ENUM;
8409
756c5857 8410#ifdef __OPTIMIZE__
075691af
AI
8411extern __inline __m128
8412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8413_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8414{
8415 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8416 (__v4sf) __B,
8417 __R);
8418}
8419
8420extern __inline __m128d
8421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8422_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8423{
8424 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8425 (__v2df) __B,
8426 __R);
8427}
8428
756c5857
AI
8429extern __inline __m512
8430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8431_mm512_getexp_round_ps (__m512 __A, const int __R)
8432{
8433 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8434 (__v16sf)
0b192937 8435 _mm512_undefined_ps (),
756c5857
AI
8436 (__mmask16) -1, __R);
8437}
8438
8439extern __inline __m512
8440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8441_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8442 const int __R)
8443{
8444 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8445 (__v16sf) __W,
8446 (__mmask16) __U, __R);
8447}
8448
8449extern __inline __m512
8450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8451_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8452{
8453 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8454 (__v16sf)
8455 _mm512_setzero_ps (),
8456 (__mmask16) __U, __R);
8457}
8458
8459extern __inline __m512d
8460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8461_mm512_getexp_round_pd (__m512d __A, const int __R)
8462{
8463 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8464 (__v8df)
0b192937 8465 _mm512_undefined_pd (),
756c5857
AI
8466 (__mmask8) -1, __R);
8467}
8468
8469extern __inline __m512d
8470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8471_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8472 const int __R)
8473{
8474 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8475 (__v8df) __W,
8476 (__mmask8) __U, __R);
8477}
8478
8479extern __inline __m512d
8480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8481_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8482{
8483 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8484 (__v8df)
8485 _mm512_setzero_pd (),
8486 (__mmask8) __U, __R);
8487}
8488
756c5857
AI
8489extern __inline __m512d
8490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8491_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8492 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8493{
8494 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8495 (__C << 2) | __B,
0b192937 8496 _mm512_undefined_pd (),
756c5857
AI
8497 (__mmask8) -1, __R);
8498}
8499
8500extern __inline __m512d
8501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8502_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8503 _MM_MANTISSA_NORM_ENUM __B,
8504 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8505{
8506 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8507 (__C << 2) | __B,
8508 (__v8df) __W, __U,
8509 __R);
8510}
8511
8512extern __inline __m512d
8513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8514_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8515 _MM_MANTISSA_NORM_ENUM __B,
8516 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8517{
8518 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8519 (__C << 2) | __B,
8520 (__v8df)
8521 _mm512_setzero_pd (),
8522 __U, __R);
8523}
8524
8525extern __inline __m512
8526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8527_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8528 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8529{
8530 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8531 (__C << 2) | __B,
0b192937 8532 _mm512_undefined_ps (),
756c5857
AI
8533 (__mmask16) -1, __R);
8534}
8535
8536extern __inline __m512
8537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8538_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8539 _MM_MANTISSA_NORM_ENUM __B,
8540 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8541{
8542 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8543 (__C << 2) | __B,
8544 (__v16sf) __W, __U,
8545 __R);
8546}
8547
8548extern __inline __m512
8549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8550_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8551 _MM_MANTISSA_NORM_ENUM __B,
8552 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8553{
8554 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8555 (__C << 2) | __B,
8556 (__v16sf)
8557 _mm512_setzero_ps (),
8558 __U, __R);
8559}
8560
075691af
AI
8561extern __inline __m128d
8562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8563_mm_getmant_round_sd (__m128d __A, __m128d __B,
8564 _MM_MANTISSA_NORM_ENUM __C,
8565 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8566{
8567 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8568 (__v2df) __B,
8569 (__D << 2) | __C,
8570 __R);
8571}
8572
8573extern __inline __m128
8574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8575_mm_getmant_round_ss (__m128 __A, __m128 __B,
8576 _MM_MANTISSA_NORM_ENUM __C,
8577 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8578{
8579 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8580 (__v4sf) __B,
8581 (__D << 2) | __C,
8582 __R);
8583}
8584
756c5857
AI
8585#else
8586#define _mm512_getmant_round_pd(X, B, C, R) \
8587 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8588 (int)(((C)<<2) | (B)), \
0b192937 8589 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
8590 (__mmask8)-1,\
8591 (R)))
8592
8593#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8594 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8595 (int)(((C)<<2) | (B)), \
8596 (__v8df)(__m512d)(W), \
8597 (__mmask8)(U),\
8598 (R)))
8599
8600#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8601 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8602 (int)(((C)<<2) | (B)), \
8603 (__v8df)(__m512d)_mm512_setzero_pd(), \
8604 (__mmask8)(U),\
8605 (R)))
8606#define _mm512_getmant_round_ps(X, B, C, R) \
8607 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8608 (int)(((C)<<2) | (B)), \
0b192937 8609 (__v16sf)(__m512)_mm512_undefined_ps(), \
756c5857
AI
8610 (__mmask16)-1,\
8611 (R)))
8612
8613#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8614 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8615 (int)(((C)<<2) | (B)), \
8616 (__v16sf)(__m512)(W), \
8617 (__mmask16)(U),\
8618 (R)))
8619
8620#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8621 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8622 (int)(((C)<<2) | (B)), \
8623 (__v16sf)(__m512)_mm512_setzero_ps(), \
8624 (__mmask16)(U),\
8625 (R)))
075691af
AI
8626#define _mm_getmant_round_sd(X, Y, C, D, R) \
8627 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8628 (__v2df)(__m128d)(Y), \
8629 (int)(((D)<<2) | (C)), \
8630 (R)))
8631
8632#define _mm_getmant_round_ss(X, Y, C, D, R) \
8633 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8634 (__v4sf)(__m128)(Y), \
8635 (int)(((D)<<2) | (C)), \
8636 (R)))
8637
8638#define _mm_getexp_round_ss(A, B, R) \
8639 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8640
8641#define _mm_getexp_round_sd(A, B, R) \
8642 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8643
756c5857
AI
8644#define _mm512_getexp_round_ps(A, R) \
8645 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 8646 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
756c5857
AI
8647
8648#define _mm512_mask_getexp_round_ps(W, U, A, R) \
8649 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8650 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8651
8652#define _mm512_maskz_getexp_round_ps(U, A, R) \
8653 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8654 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8655
8656#define _mm512_getexp_round_pd(A, R) \
8657 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 8658 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
756c5857
AI
8659
8660#define _mm512_mask_getexp_round_pd(W, U, A, R) \
8661 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8662 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8663
8664#define _mm512_maskz_getexp_round_pd(U, A, R) \
8665 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8666 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8667#endif
8668
8669#ifdef __OPTIMIZE__
8670extern __inline __m512
8671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8673{
8674 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
8675 (__v16sf)
8676 _mm512_undefined_ps (),
8677 -1, __R);
756c5857
AI
8678}
8679
8680extern __inline __m512
8681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8682_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8683 const int __imm, const int __R)
8684{
8685 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8686 (__v16sf) __A,
8687 (__mmask16) __B, __R);
8688}
8689
8690extern __inline __m512
8691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8692_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8693 const int __imm, const int __R)
8694{
8695 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8696 __imm,
8697 (__v16sf)
8698 _mm512_setzero_ps (),
8699 (__mmask16) __A, __R);
8700}
8701
8702extern __inline __m512d
8703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8704_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8705{
8706 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
8707 (__v8df)
8708 _mm512_undefined_pd (),
8709 -1, __R);
756c5857
AI
8710}
8711
8712extern __inline __m512d
8713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8714_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8715 __m512d __C, const int __imm, const int __R)
8716{
8717 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8718 (__v8df) __A,
8719 (__mmask8) __B, __R);
8720}
8721
8722extern __inline __m512d
8723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8724_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8725 const int __imm, const int __R)
8726{
8727 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8728 __imm,
8729 (__v8df)
8730 _mm512_setzero_pd (),
8731 (__mmask8) __A, __R);
8732}
075691af
AI
8733
8734extern __inline __m128
8735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8737{
8738 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8739 (__v4sf) __B, __imm, __R);
8740}
8741
8742extern __inline __m128d
8743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8744_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8745 const int __R)
8746{
8747 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8748 (__v2df) __B, __imm, __R);
8749}
8750
756c5857
AI
8751#else
8752#define _mm512_roundscale_round_ps(A, B, R) \
8753 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 8754 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
756c5857
AI
8755#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8756 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8757 (int)(D), \
8758 (__v16sf)(__m512)(A), \
8759 (__mmask16)(B), R))
8760#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8761 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8762 (int)(C), \
8763 (__v16sf)_mm512_setzero_ps(),\
8764 (__mmask16)(A), R))
8765#define _mm512_roundscale_round_pd(A, B, R) \
8766 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 8767 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
756c5857
AI
8768#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8769 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8770 (int)(D), \
8771 (__v8df)(__m512d)(A), \
8772 (__mmask8)(B), R))
8773#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8774 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8775 (int)(C), \
8776 (__v8df)_mm512_setzero_pd(),\
8777 (__mmask8)(A), R))
075691af
AI
8778#define _mm_roundscale_round_ss(A, B, C, R) \
8779 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8780 (__v4sf)(__m128)(B), (int)(C), R))
8781#define _mm_roundscale_round_sd(A, B, C, R) \
8782 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8783 (__v2df)(__m128d)(B), (int)(C), R))
756c5857
AI
8784#endif
8785
8786extern __inline __m512
8787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8788_mm512_floor_ps (__m512 __A)
8789{
8790 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8791 _MM_FROUND_FLOOR,
8792 (__v16sf) __A, -1,
8793 _MM_FROUND_CUR_DIRECTION);
8794}
8795
8796extern __inline __m512d
8797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8798_mm512_floor_pd (__m512d __A)
8799{
8800 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8801 _MM_FROUND_FLOOR,
8802 (__v8df) __A, -1,
8803 _MM_FROUND_CUR_DIRECTION);
8804}
8805
8806extern __inline __m512
8807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8808_mm512_ceil_ps (__m512 __A)
8809{
8810 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8811 _MM_FROUND_CEIL,
8812 (__v16sf) __A, -1,
8813 _MM_FROUND_CUR_DIRECTION);
8814}
8815
8816extern __inline __m512d
8817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8818_mm512_ceil_pd (__m512d __A)
8819{
8820 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8821 _MM_FROUND_CEIL,
8822 (__v8df) __A, -1,
8823 _MM_FROUND_CUR_DIRECTION);
8824}
8825
8826extern __inline __m512
8827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8828_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8829{
8830 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8831 _MM_FROUND_FLOOR,
8832 (__v16sf) __W, __U,
8833 _MM_FROUND_CUR_DIRECTION);
8834}
8835
8836extern __inline __m512d
8837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8839{
8840 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8841 _MM_FROUND_FLOOR,
8842 (__v8df) __W, __U,
8843 _MM_FROUND_CUR_DIRECTION);
8844}
8845
8846extern __inline __m512
8847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8848_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8849{
8850 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8851 _MM_FROUND_CEIL,
8852 (__v16sf) __W, __U,
8853 _MM_FROUND_CUR_DIRECTION);
8854}
8855
8856extern __inline __m512d
8857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8858_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8859{
8860 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8861 _MM_FROUND_CEIL,
8862 (__v8df) __W, __U,
8863 _MM_FROUND_CUR_DIRECTION);
8864}
8865
756c5857 8866#ifdef __OPTIMIZE__
756c5857
AI
8867extern __inline __m512i
8868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8869_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8870{
8871 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8872 (__v16si) __B, __imm,
8873 (__v16si)
4271e5cb 8874 _mm512_undefined_epi32 (),
756c5857
AI
8875 (__mmask16) -1);
8876}
8877
8878extern __inline __m512i
8879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8880_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8881 __m512i __B, const int __imm)
8882{
8883 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8884 (__v16si) __B, __imm,
8885 (__v16si) __W,
8886 (__mmask16) __U);
8887}
8888
8889extern __inline __m512i
8890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8891_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8892 const int __imm)
8893{
8894 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8895 (__v16si) __B, __imm,
8896 (__v16si)
8897 _mm512_setzero_si512 (),
8898 (__mmask16) __U);
8899}
8900
8901extern __inline __m512i
8902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8903_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8904{
8905 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8906 (__v8di) __B, __imm,
8907 (__v8di)
4271e5cb 8908 _mm512_undefined_epi32 (),
756c5857
AI
8909 (__mmask8) -1);
8910}
8911
8912extern __inline __m512i
8913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8914_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8915 __m512i __B, const int __imm)
8916{
8917 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8918 (__v8di) __B, __imm,
8919 (__v8di) __W,
8920 (__mmask8) __U);
8921}
8922
8923extern __inline __m512i
8924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8925_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8926 const int __imm)
8927{
8928 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8929 (__v8di) __B, __imm,
8930 (__v8di)
8931 _mm512_setzero_si512 (),
8932 (__mmask8) __U);
8933}
8934#else
756c5857
AI
8935#define _mm512_alignr_epi32(X, Y, C) \
8936 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
4271e5cb 8937 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
756c5857
AI
8938 (__mmask16)-1))
8939
8940#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8941 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8942 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8943 (__mmask16)(U)))
8944
8945#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8946 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
0b192937 8947 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
756c5857
AI
8948 (__mmask16)(U)))
8949
8950#define _mm512_alignr_epi64(X, Y, C) \
8951 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
4271e5cb 8952 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
0b192937 8953 (__mmask8)-1))
756c5857
AI
8954
8955#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8956 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8957 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8958
8959#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8960 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
0b192937 8961 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
756c5857
AI
8962 (__mmask8)(U)))
8963#endif
8964
8965extern __inline __mmask16
8966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8967_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8968{
8969 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8970 (__v16si) __B,
8971 (__mmask16) -1);
8972}
8973
8974extern __inline __mmask16
8975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8976_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8977{
8978 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8979 (__v16si) __B, __U);
8980}
8981
8982extern __inline __mmask8
8983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8985{
8986 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8987 (__v8di) __B, __U);
8988}
8989
8990extern __inline __mmask8
8991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8992_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8993{
8994 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8995 (__v8di) __B,
8996 (__mmask8) -1);
8997}
8998
8999extern __inline __mmask16
9000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9001_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9002{
9003 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9004 (__v16si) __B,
9005 (__mmask16) -1);
9006}
9007
9008extern __inline __mmask16
9009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9010_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9011{
9012 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9013 (__v16si) __B, __U);
9014}
9015
9016extern __inline __mmask8
9017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9018_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9019{
9020 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9021 (__v8di) __B, __U);
9022}
9023
9024extern __inline __mmask8
9025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9026_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9027{
9028 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9029 (__v8di) __B,
9030 (__mmask8) -1);
9031}
9032
d256b866
IT
9033extern __inline __mmask16
9034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9035_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9036{
9037 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9038 (__v16si) __Y, 5,
9039 (__mmask16) -1);
9040}
9041
275be1da
IT
9042extern __inline __mmask16
9043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9044_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9045{
9046 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9047 (__v16si) __Y, 5,
9048 (__mmask16) __M);
9049}
9050
9051extern __inline __mmask16
9052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9053_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9054{
9055 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9056 (__v16si) __Y, 5,
9057 (__mmask16) __M);
9058}
9059
d256b866
IT
9060extern __inline __mmask16
9061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9062_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9063{
9064 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9065 (__v16si) __Y, 5,
9066 (__mmask16) -1);
9067}
9068
275be1da
IT
9069extern __inline __mmask8
9070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9071_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9072{
9073 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9074 (__v8di) __Y, 5,
9075 (__mmask8) __M);
9076}
9077
d256b866
IT
9078extern __inline __mmask8
9079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9080_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9081{
9082 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9083 (__v8di) __Y, 5,
9084 (__mmask8) -1);
9085}
9086
275be1da
IT
9087extern __inline __mmask8
9088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9089_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9090{
9091 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9092 (__v8di) __Y, 5,
9093 (__mmask8) __M);
9094}
9095
d256b866
IT
9096extern __inline __mmask8
9097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9098_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9099{
9100 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9101 (__v8di) __Y, 5,
9102 (__mmask8) -1);
9103}
9104
275be1da
IT
9105extern __inline __mmask16
9106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9107_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9108{
9109 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9110 (__v16si) __Y, 2,
9111 (__mmask16) __M);
9112}
9113
d256b866
IT
9114extern __inline __mmask16
9115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9116_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9117{
9118 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9119 (__v16si) __Y, 2,
9120 (__mmask16) -1);
9121}
9122
275be1da
IT
9123extern __inline __mmask16
9124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9125_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9126{
9127 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9128 (__v16si) __Y, 2,
9129 (__mmask16) __M);
9130}
9131
d256b866
IT
9132extern __inline __mmask16
9133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9134_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9135{
9136 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9137 (__v16si) __Y, 2,
9138 (__mmask16) -1);
9139}
9140
275be1da
IT
9141extern __inline __mmask8
9142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9143_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9144{
9145 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9146 (__v8di) __Y, 2,
9147 (__mmask8) __M);
9148}
9149
d256b866
IT
9150extern __inline __mmask8
9151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9152_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9153{
9154 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9155 (__v8di) __Y, 2,
9156 (__mmask8) -1);
9157}
9158
275be1da
IT
9159extern __inline __mmask8
9160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9162{
9163 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9164 (__v8di) __Y, 2,
9165 (__mmask8) __M);
9166}
9167
d256b866
IT
9168extern __inline __mmask8
9169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9170_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9171{
9172 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9173 (__v8di) __Y, 2,
9174 (__mmask8) -1);
9175}
9176
275be1da
IT
9177extern __inline __mmask16
9178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9179_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9180{
9181 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9182 (__v16si) __Y, 1,
9183 (__mmask16) __M);
9184}
9185
d256b866
IT
9186extern __inline __mmask16
9187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9188_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9189{
9190 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9191 (__v16si) __Y, 1,
9192 (__mmask16) -1);
9193}
9194
275be1da
IT
9195extern __inline __mmask16
9196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9197_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9198{
9199 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9200 (__v16si) __Y, 1,
9201 (__mmask16) __M);
9202}
9203
d256b866
IT
9204extern __inline __mmask16
9205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9206_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9207{
9208 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9209 (__v16si) __Y, 1,
9210 (__mmask16) -1);
9211}
9212
275be1da
IT
9213extern __inline __mmask8
9214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9215_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9216{
9217 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9218 (__v8di) __Y, 1,
9219 (__mmask8) __M);
9220}
9221
d256b866
IT
9222extern __inline __mmask8
9223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9224_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9225{
9226 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9227 (__v8di) __Y, 1,
9228 (__mmask8) -1);
9229}
9230
275be1da
IT
9231extern __inline __mmask8
9232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9233_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9234{
9235 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9236 (__v8di) __Y, 1,
9237 (__mmask8) __M);
9238}
9239
d256b866
IT
9240extern __inline __mmask8
9241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9242_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9243{
9244 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9245 (__v8di) __Y, 1,
9246 (__mmask8) -1);
9247}
9248
9249extern __inline __mmask16
9250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9251_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9252{
9253 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9254 (__v16si) __Y, 4,
9255 (__mmask16) -1);
9256}
9257
275be1da
IT
9258extern __inline __mmask16
9259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9260_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9261{
9262 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9263 (__v16si) __Y, 4,
9264 (__mmask16) __M);
9265}
9266
9267extern __inline __mmask16
9268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9269_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9270{
9271 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9272 (__v16si) __Y, 4,
9273 (__mmask16) __M);
9274}
9275
d256b866
IT
9276extern __inline __mmask16
9277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9278_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9279{
9280 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9281 (__v16si) __Y, 4,
9282 (__mmask16) -1);
9283}
9284
275be1da
IT
9285extern __inline __mmask8
9286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9287_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9288{
9289 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9290 (__v8di) __Y, 4,
9291 (__mmask8) __M);
9292}
9293
d256b866
IT
9294extern __inline __mmask8
9295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9296_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9297{
9298 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9299 (__v8di) __Y, 4,
9300 (__mmask8) -1);
9301}
9302
275be1da
IT
9303extern __inline __mmask8
9304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9305_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9306{
9307 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9308 (__v8di) __Y, 4,
9309 (__mmask8) __M);
9310}
9311
d256b866
IT
9312extern __inline __mmask8
9313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9314_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9315{
9316 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9317 (__v8di) __Y, 4,
9318 (__mmask8) -1);
9319}
9320
756c5857
AI
9321#define _MM_CMPINT_EQ 0x0
9322#define _MM_CMPINT_LT 0x1
9323#define _MM_CMPINT_LE 0x2
9324#define _MM_CMPINT_UNUSED 0x3
9325#define _MM_CMPINT_NE 0x4
9326#define _MM_CMPINT_NLT 0x5
9327#define _MM_CMPINT_GE 0x5
9328#define _MM_CMPINT_NLE 0x6
9329#define _MM_CMPINT_GT 0x6
9330
9331#ifdef __OPTIMIZE__
d8ea3e7c
AS
9332extern __inline __mmask16
9333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9334_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9335{
9336 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9337 (__mmask8) __B);
9338}
9339
9340extern __inline __mmask16
9341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9342_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9343{
9344 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9345 (__mmask8) __B);
9346}
9347
756c5857
AI
9348extern __inline __mmask8
9349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9350_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9351{
9352 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9353 (__v8di) __Y, __P,
9354 (__mmask8) -1);
9355}
9356
9357extern __inline __mmask16
9358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9359_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9360{
9361 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9362 (__v16si) __Y, __P,
9363 (__mmask16) -1);
9364}
9365
9366extern __inline __mmask8
9367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9368_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9369{
9370 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9371 (__v8di) __Y, __P,
9372 (__mmask8) -1);
9373}
9374
9375extern __inline __mmask16
9376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9377_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9378{
9379 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9380 (__v16si) __Y, __P,
9381 (__mmask16) -1);
9382}
9383
9384extern __inline __mmask8
9385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9386_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9387 const int __R)
9388{
9389 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9390 (__v8df) __Y, __P,
9391 (__mmask8) -1, __R);
9392}
9393
9394extern __inline __mmask16
9395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9396_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9397{
9398 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9399 (__v16sf) __Y, __P,
9400 (__mmask16) -1, __R);
9401}
9402
9403extern __inline __mmask8
9404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9405_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9406 const int __P)
9407{
9408 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9409 (__v8di) __Y, __P,
9410 (__mmask8) __U);
9411}
9412
9413extern __inline __mmask16
9414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9415_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9416 const int __P)
9417{
9418 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9419 (__v16si) __Y, __P,
9420 (__mmask16) __U);
9421}
9422
9423extern __inline __mmask8
9424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9425_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9426 const int __P)
9427{
9428 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9429 (__v8di) __Y, __P,
9430 (__mmask8) __U);
9431}
9432
9433extern __inline __mmask16
9434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9435_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9436 const int __P)
9437{
9438 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9439 (__v16si) __Y, __P,
9440 (__mmask16) __U);
9441}
9442
9443extern __inline __mmask8
9444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9445_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9446 const int __P, const int __R)
9447{
9448 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9449 (__v8df) __Y, __P,
9450 (__mmask8) __U, __R);
9451}
9452
9453extern __inline __mmask16
9454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9455_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9456 const int __P, const int __R)
9457{
9458 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9459 (__v16sf) __Y, __P,
9460 (__mmask16) __U, __R);
9461}
9462
9463extern __inline __mmask8
9464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9465_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9466{
9467 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9468 (__v2df) __Y, __P,
9469 (__mmask8) -1, __R);
9470}
9471
9472extern __inline __mmask8
9473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9474_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9475 const int __P, const int __R)
9476{
9477 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9478 (__v2df) __Y, __P,
9479 (__mmask8) __M, __R);
9480}
9481
9482extern __inline __mmask8
9483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9485{
9486 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9487 (__v4sf) __Y, __P,
9488 (__mmask8) -1, __R);
9489}
9490
9491extern __inline __mmask8
9492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9494 const int __P, const int __R)
9495{
9496 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9497 (__v4sf) __Y, __P,
9498 (__mmask8) __M, __R);
9499}
9500
9501#else
d8ea3e7c
AS
9502#define _kshiftli_mask16(X, Y) \
9503 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9504
9505#define _kshiftri_mask16(X, Y) \
9506 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9507
756c5857
AI
9508#define _mm512_cmp_epi64_mask(X, Y, P) \
9509 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9510 (__v8di)(__m512i)(Y), (int)(P),\
9511 (__mmask8)-1))
9512
9513#define _mm512_cmp_epi32_mask(X, Y, P) \
383321ec
UB
9514 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9515 (__v16si)(__m512i)(Y), (int)(P), \
9516 (__mmask16)-1))
756c5857
AI
9517
9518#define _mm512_cmp_epu64_mask(X, Y, P) \
9519 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9520 (__v8di)(__m512i)(Y), (int)(P),\
9521 (__mmask8)-1))
9522
9523#define _mm512_cmp_epu32_mask(X, Y, P) \
383321ec
UB
9524 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9525 (__v16si)(__m512i)(Y), (int)(P), \
9526 (__mmask16)-1))
756c5857 9527
383321ec 9528#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
756c5857
AI
9529 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9530 (__v8df)(__m512d)(Y), (int)(P),\
9531 (__mmask8)-1, R))
9532
383321ec 9533#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
756c5857
AI
9534 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9535 (__v16sf)(__m512)(Y), (int)(P),\
9536 (__mmask16)-1, R))
9537
383321ec 9538#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
756c5857
AI
9539 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9540 (__v8di)(__m512i)(Y), (int)(P),\
9541 (__mmask8)M))
9542
383321ec
UB
9543#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9544 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9545 (__v16si)(__m512i)(Y), (int)(P), \
9546 (__mmask16)M))
756c5857 9547
383321ec 9548#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
756c5857
AI
9549 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9550 (__v8di)(__m512i)(Y), (int)(P),\
9551 (__mmask8)M))
9552
383321ec
UB
9553#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9554 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9555 (__v16si)(__m512i)(Y), (int)(P), \
9556 (__mmask16)M))
756c5857 9557
383321ec 9558#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
756c5857
AI
9559 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9560 (__v8df)(__m512d)(Y), (int)(P),\
9561 (__mmask8)M, R))
9562
383321ec 9563#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
756c5857
AI
9564 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9565 (__v16sf)(__m512)(Y), (int)(P),\
9566 (__mmask16)M, R))
9567
383321ec 9568#define _mm_cmp_round_sd_mask(X, Y, P, R) \
756c5857
AI
9569 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9570 (__v2df)(__m128d)(Y), (int)(P),\
9571 (__mmask8)-1, R))
9572
383321ec 9573#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
756c5857
AI
9574 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9575 (__v2df)(__m128d)(Y), (int)(P),\
9576 (M), R))
9577
383321ec 9578#define _mm_cmp_round_ss_mask(X, Y, P, R) \
756c5857
AI
9579 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9580 (__v4sf)(__m128)(Y), (int)(P), \
9581 (__mmask8)-1, R))
9582
383321ec 9583#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
756c5857
AI
9584 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9585 (__v4sf)(__m128)(Y), (int)(P), \
9586 (M), R))
9587#endif
9588
9589#ifdef __OPTIMIZE__
9590extern __inline __m512
9591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9592_mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 9593{
b5fd0b71
JJ
9594 __m512 __v1_old = _mm512_undefined_ps ();
9595 __mmask16 __mask = 0xFFFF;
756c5857 9596
b5fd0b71 9597 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
9598 __addr,
9599 (__v16si) __index,
b5fd0b71 9600 __mask, __scale);
756c5857
AI
9601}
9602
9603extern __inline __m512
9604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 9605_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
0e171d54 9606 __m512i __index, void const *__addr, int __scale)
756c5857 9607{
b5fd0b71 9608 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
9609 __addr,
9610 (__v16si) __index,
9611 __mask, __scale);
9612}
9613
9614extern __inline __m512d
9615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9616_mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
756c5857 9617{
b5fd0b71
JJ
9618 __m512d __v1_old = _mm512_undefined_pd ();
9619 __mmask8 __mask = 0xFF;
756c5857 9620
b5fd0b71 9621 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
756c5857 9622 __addr,
b5fd0b71 9623 (__v8si) __index, __mask,
756c5857
AI
9624 __scale);
9625}
9626
9627extern __inline __m512d
9628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9629_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 9630 __m256i __index, void const *__addr, int __scale)
756c5857
AI
9631{
9632 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9633 __addr,
9634 (__v8si) __index,
9635 __mask, __scale);
9636}
9637
9638extern __inline __m256
9639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9640_mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 9641{
b5fd0b71
JJ
9642 __m256 __v1_old = _mm256_undefined_ps ();
9643 __mmask8 __mask = 0xFF;
756c5857 9644
b5fd0b71 9645 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
756c5857 9646 __addr,
b5fd0b71 9647 (__v8di) __index, __mask,
756c5857
AI
9648 __scale);
9649}
9650
9651extern __inline __m256
9652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9653_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
0e171d54 9654 __m512i __index, void const *__addr, int __scale)
756c5857
AI
9655{
9656 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9657 __addr,
9658 (__v8di) __index,
9659 __mask, __scale);
9660}
9661
9662extern __inline __m512d
9663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9664_mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
756c5857 9665{
b5fd0b71
JJ
9666 __m512d __v1_old = _mm512_undefined_pd ();
9667 __mmask8 __mask = 0xFF;
756c5857 9668
b5fd0b71 9669 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
756c5857 9670 __addr,
b5fd0b71 9671 (__v8di) __index, __mask,
756c5857
AI
9672 __scale);
9673}
9674
9675extern __inline __m512d
9676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9677_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 9678 __m512i __index, void const *__addr, int __scale)
756c5857
AI
9679{
9680 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9681 __addr,
9682 (__v8di) __index,
9683 __mask, __scale);
9684}
9685
9686extern __inline __m512i
9687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9688_mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 9689{
b5fd0b71
JJ
9690 __m512i __v1_old = _mm512_undefined_epi32 ();
9691 __mmask16 __mask = 0xFFFF;
756c5857 9692
b5fd0b71 9693 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
756c5857
AI
9694 __addr,
9695 (__v16si) __index,
b5fd0b71 9696 __mask, __scale);
756c5857
AI
9697}
9698
9699extern __inline __m512i
9700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9701_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
0e171d54 9702 __m512i __index, void const *__addr, int __scale)
756c5857
AI
9703{
9704 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9705 __addr,
9706 (__v16si) __index,
9707 __mask, __scale);
9708}
9709
9710extern __inline __m512i
9711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9712_mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
756c5857 9713{
b5fd0b71
JJ
9714 __m512i __v1_old = _mm512_undefined_epi32 ();
9715 __mmask8 __mask = 0xFF;
756c5857 9716
b5fd0b71 9717 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
756c5857 9718 __addr,
b5fd0b71 9719 (__v8si) __index, __mask,
756c5857
AI
9720 __scale);
9721}
9722
9723extern __inline __m512i
9724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9725_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 9726 __m256i __index, void const *__addr,
756c5857
AI
9727 int __scale)
9728{
9729 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9730 __addr,
9731 (__v8si) __index,
9732 __mask, __scale);
9733}
9734
9735extern __inline __m256i
9736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9737_mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 9738{
b5fd0b71
JJ
9739 __m256i __v1_old = _mm256_undefined_si256 ();
9740 __mmask8 __mask = 0xFF;
756c5857 9741
b5fd0b71 9742 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
756c5857
AI
9743 __addr,
9744 (__v8di) __index,
b5fd0b71 9745 __mask, __scale);
756c5857
AI
9746}
9747
9748extern __inline __m256i
9749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9750_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
0e171d54 9751 __m512i __index, void const *__addr, int __scale)
756c5857
AI
9752{
9753 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9754 __addr,
9755 (__v8di) __index,
9756 __mask, __scale);
9757}
9758
9759extern __inline __m512i
9760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9761_mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
756c5857 9762{
b5fd0b71
JJ
9763 __m512i __v1_old = _mm512_undefined_epi32 ();
9764 __mmask8 __mask = 0xFF;
756c5857 9765
b5fd0b71 9766 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
756c5857 9767 __addr,
b5fd0b71 9768 (__v8di) __index, __mask,
756c5857
AI
9769 __scale);
9770}
9771
9772extern __inline __m512i
9773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9774_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 9775 __m512i __index, void const *__addr,
756c5857
AI
9776 int __scale)
9777{
9778 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9779 __addr,
9780 (__v8di) __index,
9781 __mask, __scale);
9782}
9783
9784extern __inline void
9785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9786_mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
756c5857
AI
9787{
9788 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9789 (__v16si) __index, (__v16sf) __v1, __scale);
9790}
9791
9792extern __inline void
9793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9794_mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
756c5857
AI
9795 __m512i __index, __m512 __v1, int __scale)
9796{
9797 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9798 (__v16sf) __v1, __scale);
9799}
9800
9801extern __inline void
9802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9803_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
756c5857
AI
9804 int __scale)
9805{
9806 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9807 (__v8si) __index, (__v8df) __v1, __scale);
9808}
9809
9810extern __inline void
9811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9812_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
9813 __m256i __index, __m512d __v1, int __scale)
9814{
9815 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9816 (__v8df) __v1, __scale);
9817}
9818
9819extern __inline void
9820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9821_mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
756c5857
AI
9822{
9823 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9824 (__v8di) __index, (__v8sf) __v1, __scale);
9825}
9826
9827extern __inline void
9828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9829_mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
756c5857
AI
9830 __m512i __index, __m256 __v1, int __scale)
9831{
9832 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9833 (__v8sf) __v1, __scale);
9834}
9835
9836extern __inline void
9837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9838_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
756c5857
AI
9839 int __scale)
9840{
9841 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9842 (__v8di) __index, (__v8df) __v1, __scale);
9843}
9844
9845extern __inline void
9846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9847_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
9848 __m512i __index, __m512d __v1, int __scale)
9849{
9850 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9851 (__v8df) __v1, __scale);
9852}
9853
9854extern __inline void
9855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9856_mm512_i32scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
9857 __m512i __v1, int __scale)
9858{
9859 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9860 (__v16si) __index, (__v16si) __v1, __scale);
9861}
9862
9863extern __inline void
9864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9865_mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
756c5857
AI
9866 __m512i __index, __m512i __v1, int __scale)
9867{
9868 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9869 (__v16si) __v1, __scale);
9870}
9871
9872extern __inline void
9873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9874_mm512_i32scatter_epi64 (void *__addr, __m256i __index,
756c5857
AI
9875 __m512i __v1, int __scale)
9876{
9877 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9878 (__v8si) __index, (__v8di) __v1, __scale);
9879}
9880
9881extern __inline void
9882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9883_mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
9884 __m256i __index, __m512i __v1, int __scale)
9885{
9886 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9887 (__v8di) __v1, __scale);
9888}
9889
9890extern __inline void
9891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9892_mm512_i64scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
9893 __m256i __v1, int __scale)
9894{
9895 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9896 (__v8di) __index, (__v8si) __v1, __scale);
9897}
9898
9899extern __inline void
9900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9901_mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
756c5857
AI
9902 __m512i __index, __m256i __v1, int __scale)
9903{
9904 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9905 (__v8si) __v1, __scale);
9906}
9907
9908extern __inline void
9909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9910_mm512_i64scatter_epi64 (void *__addr, __m512i __index,
756c5857
AI
9911 __m512i __v1, int __scale)
9912{
9913 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9914 (__v8di) __index, (__v8di) __v1, __scale);
9915}
9916
9917extern __inline void
9918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9919_mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
9920 __m512i __index, __m512i __v1, int __scale)
9921{
9922 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9923 (__v8di) __v1, __scale);
9924}
9925#else
9926#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
0b192937 9927 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
0e171d54 9928 (void const *)ADDR, \
756c5857
AI
9929 (__v16si)(__m512i)INDEX, \
9930 (__mmask16)0xFFFF, (int)SCALE)
9931
9932#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9933 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
0e171d54 9934 (void const *)ADDR, \
756c5857
AI
9935 (__v16si)(__m512i)INDEX, \
9936 (__mmask16)MASK, (int)SCALE)
9937
9938#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
0b192937 9939 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
0e171d54 9940 (void const *)ADDR, \
756c5857
AI
9941 (__v8si)(__m256i)INDEX, \
9942 (__mmask8)0xFF, (int)SCALE)
9943
9944#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9945 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
0e171d54 9946 (void const *)ADDR, \
756c5857
AI
9947 (__v8si)(__m256i)INDEX, \
9948 (__mmask8)MASK, (int)SCALE)
9949
9950#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
0b192937 9951 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
0e171d54 9952 (void const *)ADDR, \
756c5857
AI
9953 (__v8di)(__m512i)INDEX, \
9954 (__mmask8)0xFF, (int)SCALE)
9955
9956#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9957 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
0e171d54 9958 (void const *)ADDR, \
756c5857
AI
9959 (__v8di)(__m512i)INDEX, \
9960 (__mmask8)MASK, (int)SCALE)
9961
9962#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
0b192937 9963 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
0e171d54 9964 (void const *)ADDR, \
756c5857
AI
9965 (__v8di)(__m512i)INDEX, \
9966 (__mmask8)0xFF, (int)SCALE)
9967
9968#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9969 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
0e171d54 9970 (void const *)ADDR, \
756c5857
AI
9971 (__v8di)(__m512i)INDEX, \
9972 (__mmask8)MASK, (int)SCALE)
9973
9974#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
4271e5cb 9975 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
0e171d54 9976 (void const *)ADDR, \
756c5857
AI
9977 (__v16si)(__m512i)INDEX, \
9978 (__mmask16)0xFFFF, (int)SCALE)
9979
9980#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9981 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
0e171d54 9982 (void const *)ADDR, \
756c5857
AI
9983 (__v16si)(__m512i)INDEX, \
9984 (__mmask16)MASK, (int)SCALE)
9985
9986#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
4271e5cb 9987 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
0e171d54 9988 (void const *)ADDR, \
756c5857
AI
9989 (__v8si)(__m256i)INDEX, \
9990 (__mmask8)0xFF, (int)SCALE)
9991
9992#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9993 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
0e171d54 9994 (void const *)ADDR, \
756c5857
AI
9995 (__v8si)(__m256i)INDEX, \
9996 (__mmask8)MASK, (int)SCALE)
9997
9998#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
0b192937 9999 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
0e171d54 10000 (void const *)ADDR, \
756c5857
AI
10001 (__v8di)(__m512i)INDEX, \
10002 (__mmask8)0xFF, (int)SCALE)
10003
10004#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10005 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
0e171d54 10006 (void const *)ADDR, \
756c5857
AI
10007 (__v8di)(__m512i)INDEX, \
10008 (__mmask8)MASK, (int)SCALE)
10009
10010#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
4271e5cb 10011 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
0e171d54 10012 (void const *)ADDR, \
756c5857
AI
10013 (__v8di)(__m512i)INDEX, \
10014 (__mmask8)0xFF, (int)SCALE)
10015
10016#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10017 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
0e171d54 10018 (void const *)ADDR, \
756c5857
AI
10019 (__v8di)(__m512i)INDEX, \
10020 (__mmask8)MASK, (int)SCALE)
10021
10022#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 10023 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
756c5857
AI
10024 (__v16si)(__m512i)INDEX, \
10025 (__v16sf)(__m512)V1, (int)SCALE)
10026
10027#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10028 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
756c5857
AI
10029 (__v16si)(__m512i)INDEX, \
10030 (__v16sf)(__m512)V1, (int)SCALE)
10031
10032#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 10033 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10034 (__v8si)(__m256i)INDEX, \
10035 (__v8df)(__m512d)V1, (int)SCALE)
10036
10037#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10038 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10039 (__v8si)(__m256i)INDEX, \
10040 (__v8df)(__m512d)V1, (int)SCALE)
10041
10042#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 10043 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10044 (__v8di)(__m512i)INDEX, \
10045 (__v8sf)(__m256)V1, (int)SCALE)
10046
10047#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10048 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
756c5857
AI
10049 (__v8di)(__m512i)INDEX, \
10050 (__v8sf)(__m256)V1, (int)SCALE)
10051
10052#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 10053 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10054 (__v8di)(__m512i)INDEX, \
10055 (__v8df)(__m512d)V1, (int)SCALE)
10056
10057#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10058 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10059 (__v8di)(__m512i)INDEX, \
10060 (__v8df)(__m512d)V1, (int)SCALE)
10061
10062#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 10063 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
756c5857
AI
10064 (__v16si)(__m512i)INDEX, \
10065 (__v16si)(__m512i)V1, (int)SCALE)
10066
10067#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10068 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
756c5857
AI
10069 (__v16si)(__m512i)INDEX, \
10070 (__v16si)(__m512i)V1, (int)SCALE)
10071
10072#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 10073 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10074 (__v8si)(__m256i)INDEX, \
10075 (__v8di)(__m512i)V1, (int)SCALE)
10076
10077#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10078 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10079 (__v8si)(__m256i)INDEX, \
10080 (__v8di)(__m512i)V1, (int)SCALE)
10081
10082#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 10083 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10084 (__v8di)(__m512i)INDEX, \
10085 (__v8si)(__m256i)V1, (int)SCALE)
10086
10087#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10088 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10089 (__v8di)(__m512i)INDEX, \
10090 (__v8si)(__m256i)V1, (int)SCALE)
10091
10092#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 10093 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10094 (__v8di)(__m512i)INDEX, \
10095 (__v8di)(__m512i)V1, (int)SCALE)
10096
10097#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10098 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10099 (__v8di)(__m512i)INDEX, \
10100 (__v8di)(__m512i)V1, (int)SCALE)
10101#endif
10102
10103extern __inline __m512d
10104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10106{
10107 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10108 (__v8df) __W,
10109 (__mmask8) __U);
10110}
10111
10112extern __inline __m512d
10113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10114_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10115{
10116 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10117 (__v8df)
10118 _mm512_setzero_pd (),
10119 (__mmask8) __U);
10120}
10121
10122extern __inline void
10123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10124_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10125{
10126 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10127 (__mmask8) __U);
10128}
10129
10130extern __inline __m512
10131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10132_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10133{
10134 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10135 (__v16sf) __W,
10136 (__mmask16) __U);
10137}
10138
10139extern __inline __m512
10140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10141_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10142{
10143 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10144 (__v16sf)
10145 _mm512_setzero_ps (),
10146 (__mmask16) __U);
10147}
10148
10149extern __inline void
10150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10151_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10152{
10153 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10154 (__mmask16) __U);
10155}
10156
10157extern __inline __m512i
10158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10159_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10160{
10161 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10162 (__v8di) __W,
10163 (__mmask8) __U);
10164}
10165
10166extern __inline __m512i
10167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10168_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10169{
10170 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10171 (__v8di)
10172 _mm512_setzero_si512 (),
10173 (__mmask8) __U);
10174}
10175
10176extern __inline void
10177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10178_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10179{
10180 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10181 (__mmask8) __U);
10182}
10183
10184extern __inline __m512i
10185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10186_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10187{
10188 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10189 (__v16si) __W,
10190 (__mmask16) __U);
10191}
10192
10193extern __inline __m512i
10194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10195_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10196{
10197 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10198 (__v16si)
10199 _mm512_setzero_si512 (),
10200 (__mmask16) __U);
10201}
10202
10203extern __inline void
10204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10206{
10207 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10208 (__mmask16) __U);
10209}
10210
10211extern __inline __m512d
10212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10213_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10214{
10215 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10216 (__v8df) __W,
10217 (__mmask8) __U);
10218}
10219
10220extern __inline __m512d
10221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10222_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10223{
10224 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10225 (__v8df)
10226 _mm512_setzero_pd (),
10227 (__mmask8) __U);
10228}
10229
10230extern __inline __m512d
10231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10232_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10233{
10234 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10235 (__v8df) __W,
10236 (__mmask8) __U);
10237}
10238
10239extern __inline __m512d
10240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10241_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10242{
10243 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10244 (__v8df)
10245 _mm512_setzero_pd (),
10246 (__mmask8) __U);
10247}
10248
10249extern __inline __m512
10250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10251_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10252{
10253 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10254 (__v16sf) __W,
10255 (__mmask16) __U);
10256}
10257
10258extern __inline __m512
10259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10260_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10261{
10262 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10263 (__v16sf)
10264 _mm512_setzero_ps (),
10265 (__mmask16) __U);
10266}
10267
10268extern __inline __m512
10269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10270_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10271{
10272 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10273 (__v16sf) __W,
10274 (__mmask16) __U);
10275}
10276
10277extern __inline __m512
10278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10279_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10280{
10281 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10282 (__v16sf)
10283 _mm512_setzero_ps (),
10284 (__mmask16) __U);
10285}
10286
10287extern __inline __m512i
10288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10289_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10290{
10291 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10292 (__v8di) __W,
10293 (__mmask8) __U);
10294}
10295
10296extern __inline __m512i
10297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10298_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10299{
10300 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10301 (__v8di)
10302 _mm512_setzero_si512 (),
10303 (__mmask8) __U);
10304}
10305
10306extern __inline __m512i
10307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10308_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10309{
10310 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10311 (__v8di) __W,
10312 (__mmask8) __U);
10313}
10314
10315extern __inline __m512i
10316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10317_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10318{
10319 return (__m512i)
10320 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10321 (__v8di)
10322 _mm512_setzero_si512 (),
10323 (__mmask8) __U);
10324}
10325
10326extern __inline __m512i
10327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10328_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10329{
10330 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10331 (__v16si) __W,
10332 (__mmask16) __U);
10333}
10334
10335extern __inline __m512i
10336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10337_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10338{
10339 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10340 (__v16si)
10341 _mm512_setzero_si512 (),
10342 (__mmask16) __U);
10343}
10344
10345extern __inline __m512i
10346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10347_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10348{
10349 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10350 (__v16si) __W,
10351 (__mmask16) __U);
10352}
10353
10354extern __inline __m512i
10355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10356_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10357{
10358 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10359 (__v16si)
10360 _mm512_setzero_si512
10361 (), (__mmask16) __U);
10362}
10363
10364/* Mask arithmetic operations */
6901ea62
AS
10365#define _kand_mask16 _mm512_kand
10366#define _kandn_mask16 _mm512_kandn
10367#define _knot_mask16 _mm512_knot
10368#define _kor_mask16 _mm512_kor
10369#define _kxnor_mask16 _mm512_kxnor
10370#define _kxor_mask16 _mm512_kxor
10371
dea06111
AS
10372extern __inline unsigned char
10373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10374_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10375{
10376 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10377 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10378}
10379
10380extern __inline unsigned char
10381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10382_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10383{
10384 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10385 (__mmask16) __B);
10386}
10387
10388extern __inline unsigned char
10389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10390_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10391{
10392 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10393 (__mmask16) __B);
10394}
10395
7cdb6e4c
AS
10396extern __inline unsigned int
10397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10398_cvtmask16_u32 (__mmask16 __A)
10399{
10400 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10401}
10402
10403extern __inline __mmask16
10404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10405_cvtu32_mask16 (unsigned int __A)
10406{
10407 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10408}
10409
10410extern __inline __mmask16
10411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10412_load_mask16 (__mmask16 *__A)
10413{
10414 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10415}
10416
10417extern __inline void
10418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10419_store_mask16 (__mmask16 *__A, __mmask16 __B)
10420{
10421 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10422}
10423
756c5857
AI
10424extern __inline __mmask16
10425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10426_mm512_kand (__mmask16 __A, __mmask16 __B)
10427{
10428 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10429}
10430
10431extern __inline __mmask16
10432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10433_mm512_kandn (__mmask16 __A, __mmask16 __B)
10434{
6901ea62
AS
10435 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10436 (__mmask16) __B);
756c5857
AI
10437}
10438
10439extern __inline __mmask16
10440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10441_mm512_kor (__mmask16 __A, __mmask16 __B)
10442{
10443 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10444}
10445
10446extern __inline int
10447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10448_mm512_kortestz (__mmask16 __A, __mmask16 __B)
10449{
10450 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10451 (__mmask16) __B);
10452}
10453
10454extern __inline int
10455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10456_mm512_kortestc (__mmask16 __A, __mmask16 __B)
10457{
10458 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10459 (__mmask16) __B);
10460}
10461
10462extern __inline __mmask16
10463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10464_mm512_kxnor (__mmask16 __A, __mmask16 __B)
10465{
10466 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10467}
10468
10469extern __inline __mmask16
10470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471_mm512_kxor (__mmask16 __A, __mmask16 __B)
10472{
10473 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10474}
10475
10476extern __inline __mmask16
10477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10478_mm512_knot (__mmask16 __A)
10479{
10480 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10481}
10482
10483extern __inline __mmask16
10484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10485_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10486{
10487 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10488}
10489
6901ea62
AS
10490extern __inline __mmask16
10491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10492_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10493{
10494 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10495}
10496
756c5857
AI
10497#ifdef __OPTIMIZE__
10498extern __inline __m512i
10499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10500_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10501 const int __imm)
10502{
10503 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10504 (__v4si) __D,
10505 __imm,
10506 (__v16si)
10507 _mm512_setzero_si512 (),
10508 __B);
10509}
10510
10511extern __inline __m512
10512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10513_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10514 const int __imm)
10515{
10516 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10517 (__v4sf) __D,
10518 __imm,
10519 (__v16sf)
10520 _mm512_setzero_ps (), __B);
10521}
10522
10523extern __inline __m512i
10524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10525_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10526 __m128i __D, const int __imm)
10527{
10528 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10529 (__v4si) __D,
10530 __imm,
10531 (__v16si) __A,
10532 __B);
10533}
10534
10535extern __inline __m512
10536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10537_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10538 __m128 __D, const int __imm)
10539{
10540 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10541 (__v4sf) __D,
10542 __imm,
10543 (__v16sf) __A, __B);
10544}
10545#else
10546#define _mm512_maskz_insertf32x4(A, X, Y, C) \
10547 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10548 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10549 (__mmask8)(A)))
10550
10551#define _mm512_maskz_inserti32x4(A, X, Y, C) \
10552 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10553 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10554 (__mmask8)(A)))
10555
10556#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10557 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10558 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10559 (__mmask8)(B)))
10560
10561#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10562 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10563 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10564 (__mmask8)(B)))
10565#endif
10566
10567extern __inline __m512i
10568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10569_mm512_max_epi64 (__m512i __A, __m512i __B)
10570{
10571 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10572 (__v8di) __B,
10573 (__v8di)
4271e5cb 10574 _mm512_undefined_epi32 (),
756c5857
AI
10575 (__mmask8) -1);
10576}
10577
10578extern __inline __m512i
10579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10580_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10581{
10582 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10583 (__v8di) __B,
10584 (__v8di)
10585 _mm512_setzero_si512 (),
10586 __M);
10587}
10588
10589extern __inline __m512i
10590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10591_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10592{
10593 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10594 (__v8di) __B,
10595 (__v8di) __W, __M);
10596}
10597
10598extern __inline __m512i
10599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10600_mm512_min_epi64 (__m512i __A, __m512i __B)
10601{
10602 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10603 (__v8di) __B,
10604 (__v8di)
4271e5cb 10605 _mm512_undefined_epi32 (),
756c5857
AI
10606 (__mmask8) -1);
10607}
10608
10609extern __inline __m512i
10610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10611_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10612{
10613 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10614 (__v8di) __B,
10615 (__v8di) __W, __M);
10616}
10617
10618extern __inline __m512i
10619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10620_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10621{
10622 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10623 (__v8di) __B,
10624 (__v8di)
10625 _mm512_setzero_si512 (),
10626 __M);
10627}
10628
10629extern __inline __m512i
10630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10631_mm512_max_epu64 (__m512i __A, __m512i __B)
10632{
10633 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10634 (__v8di) __B,
10635 (__v8di)
4271e5cb 10636 _mm512_undefined_epi32 (),
756c5857
AI
10637 (__mmask8) -1);
10638}
10639
10640extern __inline __m512i
10641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10642_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10643{
10644 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10645 (__v8di) __B,
10646 (__v8di)
10647 _mm512_setzero_si512 (),
10648 __M);
10649}
10650
10651extern __inline __m512i
10652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10653_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10654{
10655 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10656 (__v8di) __B,
10657 (__v8di) __W, __M);
10658}
10659
10660extern __inline __m512i
10661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10662_mm512_min_epu64 (__m512i __A, __m512i __B)
10663{
10664 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10665 (__v8di) __B,
10666 (__v8di)
4271e5cb 10667 _mm512_undefined_epi32 (),
756c5857
AI
10668 (__mmask8) -1);
10669}
10670
10671extern __inline __m512i
10672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10673_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10674{
10675 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10676 (__v8di) __B,
10677 (__v8di) __W, __M);
10678}
10679
10680extern __inline __m512i
10681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10682_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10683{
10684 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10685 (__v8di) __B,
10686 (__v8di)
10687 _mm512_setzero_si512 (),
10688 __M);
10689}
10690
10691extern __inline __m512i
10692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10693_mm512_max_epi32 (__m512i __A, __m512i __B)
10694{
10695 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10696 (__v16si) __B,
10697 (__v16si)
4271e5cb 10698 _mm512_undefined_epi32 (),
756c5857
AI
10699 (__mmask16) -1);
10700}
10701
10702extern __inline __m512i
10703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10704_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10705{
10706 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10707 (__v16si) __B,
10708 (__v16si)
10709 _mm512_setzero_si512 (),
10710 __M);
10711}
10712
10713extern __inline __m512i
10714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10715_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10716{
10717 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10718 (__v16si) __B,
10719 (__v16si) __W, __M);
10720}
10721
10722extern __inline __m512i
10723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10724_mm512_min_epi32 (__m512i __A, __m512i __B)
10725{
10726 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10727 (__v16si) __B,
10728 (__v16si)
4271e5cb 10729 _mm512_undefined_epi32 (),
756c5857
AI
10730 (__mmask16) -1);
10731}
10732
10733extern __inline __m512i
10734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10735_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10736{
10737 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10738 (__v16si) __B,
10739 (__v16si)
10740 _mm512_setzero_si512 (),
10741 __M);
10742}
10743
10744extern __inline __m512i
10745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10747{
10748 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10749 (__v16si) __B,
10750 (__v16si) __W, __M);
10751}
10752
10753extern __inline __m512i
10754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10755_mm512_max_epu32 (__m512i __A, __m512i __B)
10756{
10757 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10758 (__v16si) __B,
10759 (__v16si)
4271e5cb 10760 _mm512_undefined_epi32 (),
756c5857
AI
10761 (__mmask16) -1);
10762}
10763
10764extern __inline __m512i
10765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10766_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10767{
10768 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10769 (__v16si) __B,
10770 (__v16si)
10771 _mm512_setzero_si512 (),
10772 __M);
10773}
10774
10775extern __inline __m512i
10776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10777_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10778{
10779 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10780 (__v16si) __B,
10781 (__v16si) __W, __M);
10782}
10783
10784extern __inline __m512i
10785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10786_mm512_min_epu32 (__m512i __A, __m512i __B)
10787{
10788 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10789 (__v16si) __B,
10790 (__v16si)
4271e5cb 10791 _mm512_undefined_epi32 (),
756c5857
AI
10792 (__mmask16) -1);
10793}
10794
10795extern __inline __m512i
10796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10797_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10798{
10799 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10800 (__v16si) __B,
10801 (__v16si)
10802 _mm512_setzero_si512 (),
10803 __M);
10804}
10805
10806extern __inline __m512i
10807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10808_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10809{
10810 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10811 (__v16si) __B,
10812 (__v16si) __W, __M);
10813}
10814
10815extern __inline __m512
10816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10817_mm512_unpacklo_ps (__m512 __A, __m512 __B)
10818{
10819 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10820 (__v16sf) __B,
10821 (__v16sf)
0b192937 10822 _mm512_undefined_ps (),
756c5857
AI
10823 (__mmask16) -1);
10824}
10825
10826extern __inline __m512
10827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10828_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10829{
10830 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10831 (__v16sf) __B,
10832 (__v16sf) __W,
10833 (__mmask16) __U);
10834}
10835
10836extern __inline __m512
10837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10838_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10839{
10840 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10841 (__v16sf) __B,
10842 (__v16sf)
10843 _mm512_setzero_ps (),
10844 (__mmask16) __U);
10845}
10846
075691af
AI
10847#ifdef __OPTIMIZE__
10848extern __inline __m128d
10849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10850_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10851{
10852 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10853 (__v2df) __B,
10854 __R);
10855}
10856
f4ee3a9e
UB
10857extern __inline __m128d
10858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10859_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
10860 __m128d __B, const int __R)
10861{
10862 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
10863 (__v2df) __B,
10864 (__v2df) __W,
10865 (__mmask8) __U, __R);
10866}
10867
10868extern __inline __m128d
10869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10870_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
10871 const int __R)
10872{
10873 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
10874 (__v2df) __B,
10875 (__v2df)
10876 _mm_setzero_pd (),
10877 (__mmask8) __U, __R);
10878}
10879
075691af
AI
10880extern __inline __m128
10881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10882_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10883{
10884 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10885 (__v4sf) __B,
10886 __R);
10887}
10888
f4ee3a9e
UB
10889extern __inline __m128
10890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
10892 __m128 __B, const int __R)
10893{
10894 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
10895 (__v4sf) __B,
10896 (__v4sf) __W,
10897 (__mmask8) __U, __R);
10898}
10899
10900extern __inline __m128
10901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10902_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
10903 const int __R)
10904{
10905 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
10906 (__v4sf) __B,
10907 (__v4sf)
10908 _mm_setzero_ps (),
10909 (__mmask8) __U, __R);
10910}
10911
075691af
AI
10912extern __inline __m128d
10913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10914_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10915{
10916 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10917 (__v2df) __B,
10918 __R);
10919}
10920
f4ee3a9e
UB
10921extern __inline __m128d
10922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10923_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
10924 __m128d __B, const int __R)
10925{
10926 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
10927 (__v2df) __B,
10928 (__v2df) __W,
10929 (__mmask8) __U, __R);
10930}
10931
10932extern __inline __m128d
10933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10934_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
10935 const int __R)
10936{
10937 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
10938 (__v2df) __B,
10939 (__v2df)
10940 _mm_setzero_pd (),
10941 (__mmask8) __U, __R);
10942}
10943
075691af
AI
10944extern __inline __m128
10945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10946_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10947{
10948 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10949 (__v4sf) __B,
10950 __R);
10951}
10952
f4ee3a9e
UB
10953extern __inline __m128
10954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10955_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
10956 __m128 __B, const int __R)
10957{
10958 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
10959 (__v4sf) __B,
10960 (__v4sf) __W,
10961 (__mmask8) __U, __R);
10962}
10963
10964extern __inline __m128
10965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10966_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
10967 const int __R)
10968{
10969 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
10970 (__v4sf) __B,
10971 (__v4sf)
10972 _mm_setzero_ps (),
10973 (__mmask8) __U, __R);
10974}
10975
075691af
AI
10976#else
10977#define _mm_max_round_sd(A, B, C) \
f4ee3a9e
UB
10978 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
10979
10980#define _mm_mask_max_round_sd(W, U, A, B, C) \
10981 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
10982
10983#define _mm_maskz_max_round_sd(U, A, B, C) \
10984 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
10985
10986#define _mm_max_round_ss(A, B, C) \
f4ee3a9e
UB
10987 (__m128)__builtin_ia32_maxss_round(A, B, C)
10988
10989#define _mm_mask_max_round_ss(W, U, A, B, C) \
10990 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
10991
10992#define _mm_maskz_max_round_ss(U, A, B, C) \
10993 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
075691af
AI
10994
10995#define _mm_min_round_sd(A, B, C) \
f4ee3a9e
UB
10996 (__m128d)__builtin_ia32_minsd_round(A, B, C)
10997
10998#define _mm_mask_min_round_sd(W, U, A, B, C) \
10999 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11000
11001#define _mm_maskz_min_round_sd(U, A, B, C) \
11002 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
11003
11004#define _mm_min_round_ss(A, B, C) \
f4ee3a9e
UB
11005 (__m128)__builtin_ia32_minss_round(A, B, C)
11006
11007#define _mm_mask_min_round_ss(W, U, A, B, C) \
11008 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11009
11010#define _mm_maskz_min_round_ss(U, A, B, C) \
11011 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11012
075691af
AI
11013#endif
11014
756c5857
AI
11015extern __inline __m512d
11016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11017_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11018{
11019 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11020 (__v8df) __W,
11021 (__mmask8) __U);
11022}
11023
11024extern __inline __m512
11025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11026_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11027{
11028 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11029 (__v16sf) __W,
11030 (__mmask16) __U);
11031}
11032
11033extern __inline __m512i
11034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11035_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11036{
11037 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11038 (__v8di) __W,
11039 (__mmask8) __U);
11040}
11041
11042extern __inline __m512i
11043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11044_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11045{
11046 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11047 (__v16si) __W,
11048 (__mmask16) __U);
11049}
11050
075691af
AI
11051#ifdef __OPTIMIZE__
11052extern __inline __m128d
11053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11054_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11055{
11056 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11057 (__v2df) __A,
11058 (__v2df) __B,
11059 __R);
11060}
11061
11062extern __inline __m128
11063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11064_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11065{
11066 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11067 (__v4sf) __A,
11068 (__v4sf) __B,
11069 __R);
11070}
11071
11072extern __inline __m128d
11073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11074_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11075{
11076 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11077 (__v2df) __A,
11078 -(__v2df) __B,
11079 __R);
11080}
11081
11082extern __inline __m128
11083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11084_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11085{
11086 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11087 (__v4sf) __A,
11088 -(__v4sf) __B,
11089 __R);
11090}
11091
11092extern __inline __m128d
11093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11094_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11095{
11096 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11097 -(__v2df) __A,
11098 (__v2df) __B,
11099 __R);
11100}
11101
11102extern __inline __m128
11103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11104_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11105{
11106 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11107 -(__v4sf) __A,
11108 (__v4sf) __B,
11109 __R);
11110}
11111
11112extern __inline __m128d
11113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11114_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11115{
11116 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11117 -(__v2df) __A,
11118 -(__v2df) __B,
11119 __R);
11120}
11121
11122extern __inline __m128
11123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11124_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11125{
11126 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11127 -(__v4sf) __A,
11128 -(__v4sf) __B,
11129 __R);
11130}
11131#else
11132#define _mm_fmadd_round_sd(A, B, C, R) \
11133 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11134
11135#define _mm_fmadd_round_ss(A, B, C, R) \
11136 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11137
11138#define _mm_fmsub_round_sd(A, B, C, R) \
11139 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11140
11141#define _mm_fmsub_round_ss(A, B, C, R) \
11142 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11143
11144#define _mm_fnmadd_round_sd(A, B, C, R) \
11145 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11146
11147#define _mm_fnmadd_round_ss(A, B, C, R) \
11148 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11149
11150#define _mm_fnmsub_round_sd(A, B, C, R) \
11151 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11152
11153#define _mm_fnmsub_round_ss(A, B, C, R) \
11154 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11155#endif
11156
756c5857
AI
11157#ifdef __OPTIMIZE__
11158extern __inline int
11159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11160_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
11161{
11162 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
11163}
11164
11165extern __inline int
11166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11167_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
11168{
11169 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
11170}
11171#else
11172#define _mm_comi_round_ss(A, B, C, D)\
11173__builtin_ia32_vcomiss(A, B, C, D)
11174#define _mm_comi_round_sd(A, B, C, D)\
11175__builtin_ia32_vcomisd(A, B, C, D)
11176#endif
11177
11178extern __inline __m512d
11179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180_mm512_sqrt_pd (__m512d __A)
11181{
11182 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11183 (__v8df)
0b192937 11184 _mm512_undefined_pd (),
756c5857
AI
11185 (__mmask8) -1,
11186 _MM_FROUND_CUR_DIRECTION);
11187}
11188
11189extern __inline __m512d
11190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11191_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
11192{
11193 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11194 (__v8df) __W,
11195 (__mmask8) __U,
11196 _MM_FROUND_CUR_DIRECTION);
11197}
11198
11199extern __inline __m512d
11200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11201_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
11202{
11203 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11204 (__v8df)
11205 _mm512_setzero_pd (),
11206 (__mmask8) __U,
11207 _MM_FROUND_CUR_DIRECTION);
11208}
11209
11210extern __inline __m512
11211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11212_mm512_sqrt_ps (__m512 __A)
11213{
11214 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11215 (__v16sf)
0b192937 11216 _mm512_undefined_ps (),
756c5857
AI
11217 (__mmask16) -1,
11218 _MM_FROUND_CUR_DIRECTION);
11219}
11220
11221extern __inline __m512
11222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11223_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
11224{
11225 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11226 (__v16sf) __W,
11227 (__mmask16) __U,
11228 _MM_FROUND_CUR_DIRECTION);
11229}
11230
11231extern __inline __m512
11232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11233_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
11234{
11235 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11236 (__v16sf)
11237 _mm512_setzero_ps (),
11238 (__mmask16) __U,
11239 _MM_FROUND_CUR_DIRECTION);
11240}
11241
11242extern __inline __m512d
11243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11244_mm512_add_pd (__m512d __A, __m512d __B)
11245{
2069d6fc 11246 return (__m512d) ((__v8df)__A + (__v8df)__B);
756c5857
AI
11247}
11248
11249extern __inline __m512d
11250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11251_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11252{
11253 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11254 (__v8df) __B,
11255 (__v8df) __W,
11256 (__mmask8) __U,
11257 _MM_FROUND_CUR_DIRECTION);
11258}
11259
11260extern __inline __m512d
11261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11262_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
11263{
11264 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11265 (__v8df) __B,
11266 (__v8df)
11267 _mm512_setzero_pd (),
11268 (__mmask8) __U,
11269 _MM_FROUND_CUR_DIRECTION);
11270}
11271
11272extern __inline __m512
11273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11274_mm512_add_ps (__m512 __A, __m512 __B)
11275{
2069d6fc 11276 return (__m512) ((__v16sf)__A + (__v16sf)__B);
756c5857
AI
11277}
11278
11279extern __inline __m512
11280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11281_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11282{
11283 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11284 (__v16sf) __B,
11285 (__v16sf) __W,
11286 (__mmask16) __U,
11287 _MM_FROUND_CUR_DIRECTION);
11288}
11289
11290extern __inline __m512
11291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11292_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
11293{
11294 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11295 (__v16sf) __B,
11296 (__v16sf)
11297 _mm512_setzero_ps (),
11298 (__mmask16) __U,
11299 _MM_FROUND_CUR_DIRECTION);
11300}
11301
1853f5c7
SP
11302extern __inline __m128d
11303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11304_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11305{
11306 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11307 (__v2df) __B,
11308 (__v2df) __W,
11309 (__mmask8) __U,
11310 _MM_FROUND_CUR_DIRECTION);
11311}
11312
11313extern __inline __m128d
11314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11315_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
11316{
11317 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11318 (__v2df) __B,
11319 (__v2df)
11320 _mm_setzero_pd (),
11321 (__mmask8) __U,
11322 _MM_FROUND_CUR_DIRECTION);
11323}
11324
11325extern __inline __m128
11326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11327_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11328{
11329 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11330 (__v4sf) __B,
11331 (__v4sf) __W,
11332 (__mmask8) __U,
11333 _MM_FROUND_CUR_DIRECTION);
11334}
11335
11336extern __inline __m128
11337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11338_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
11339{
11340 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11341 (__v4sf) __B,
11342 (__v4sf)
11343 _mm_setzero_ps (),
11344 (__mmask8) __U,
11345 _MM_FROUND_CUR_DIRECTION);
11346}
11347
756c5857
AI
11348extern __inline __m512d
11349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11350_mm512_sub_pd (__m512d __A, __m512d __B)
11351{
2069d6fc 11352 return (__m512d) ((__v8df)__A - (__v8df)__B);
756c5857
AI
11353}
11354
11355extern __inline __m512d
11356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11358{
11359 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11360 (__v8df) __B,
11361 (__v8df) __W,
11362 (__mmask8) __U,
11363 _MM_FROUND_CUR_DIRECTION);
11364}
11365
11366extern __inline __m512d
11367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
11369{
11370 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11371 (__v8df) __B,
11372 (__v8df)
11373 _mm512_setzero_pd (),
11374 (__mmask8) __U,
11375 _MM_FROUND_CUR_DIRECTION);
11376}
11377
11378extern __inline __m512
11379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11380_mm512_sub_ps (__m512 __A, __m512 __B)
11381{
2069d6fc 11382 return (__m512) ((__v16sf)__A - (__v16sf)__B);
756c5857
AI
11383}
11384
11385extern __inline __m512
11386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11387_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11388{
11389 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11390 (__v16sf) __B,
11391 (__v16sf) __W,
11392 (__mmask16) __U,
11393 _MM_FROUND_CUR_DIRECTION);
11394}
11395
11396extern __inline __m512
11397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11398_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
11399{
11400 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11401 (__v16sf) __B,
11402 (__v16sf)
11403 _mm512_setzero_ps (),
11404 (__mmask16) __U,
11405 _MM_FROUND_CUR_DIRECTION);
11406}
11407
1853f5c7
SP
11408extern __inline __m128d
11409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11410_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11411{
11412 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11413 (__v2df) __B,
11414 (__v2df) __W,
11415 (__mmask8) __U,
11416 _MM_FROUND_CUR_DIRECTION);
11417}
11418
11419extern __inline __m128d
11420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11421_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
11422{
11423 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11424 (__v2df) __B,
11425 (__v2df)
11426 _mm_setzero_pd (),
11427 (__mmask8) __U,
11428 _MM_FROUND_CUR_DIRECTION);
11429}
11430
11431extern __inline __m128
11432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11433_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11434{
11435 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11436 (__v4sf) __B,
11437 (__v4sf) __W,
11438 (__mmask8) __U,
11439 _MM_FROUND_CUR_DIRECTION);
11440}
11441
11442extern __inline __m128
11443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11444_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
11445{
11446 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11447 (__v4sf) __B,
11448 (__v4sf)
11449 _mm_setzero_ps (),
11450 (__mmask8) __U,
11451 _MM_FROUND_CUR_DIRECTION);
11452}
11453
756c5857
AI
11454extern __inline __m512d
11455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11456_mm512_mul_pd (__m512d __A, __m512d __B)
11457{
2069d6fc 11458 return (__m512d) ((__v8df)__A * (__v8df)__B);
756c5857
AI
11459}
11460
11461extern __inline __m512d
11462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11463_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11464{
11465 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11466 (__v8df) __B,
11467 (__v8df) __W,
11468 (__mmask8) __U,
11469 _MM_FROUND_CUR_DIRECTION);
11470}
11471
11472extern __inline __m512d
11473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11474_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
11475{
11476 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11477 (__v8df) __B,
11478 (__v8df)
11479 _mm512_setzero_pd (),
11480 (__mmask8) __U,
11481 _MM_FROUND_CUR_DIRECTION);
11482}
11483
11484extern __inline __m512
11485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11486_mm512_mul_ps (__m512 __A, __m512 __B)
11487{
2069d6fc 11488 return (__m512) ((__v16sf)__A * (__v16sf)__B);
756c5857
AI
11489}
11490
11491extern __inline __m512
11492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11493_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11494{
11495 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11496 (__v16sf) __B,
11497 (__v16sf) __W,
11498 (__mmask16) __U,
11499 _MM_FROUND_CUR_DIRECTION);
11500}
11501
11502extern __inline __m512
11503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11504_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
11505{
11506 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11507 (__v16sf) __B,
11508 (__v16sf)
11509 _mm512_setzero_ps (),
11510 (__mmask16) __U,
11511 _MM_FROUND_CUR_DIRECTION);
11512}
11513
f4ee3a9e
UB
11514extern __inline __m128d
11515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11516_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
11517 __m128d __B)
11518{
11519 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11520 (__v2df) __B,
11521 (__v2df) __W,
11522 (__mmask8) __U,
11523 _MM_FROUND_CUR_DIRECTION);
11524}
11525
11526extern __inline __m128d
11527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11528_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
11529{
11530 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11531 (__v2df) __B,
11532 (__v2df)
11533 _mm_setzero_pd (),
11534 (__mmask8) __U,
11535 _MM_FROUND_CUR_DIRECTION);
11536}
11537
11538extern __inline __m128
11539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11540_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
11541 __m128 __B)
11542{
11543 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11544 (__v4sf) __B,
11545 (__v4sf) __W,
11546 (__mmask8) __U,
11547 _MM_FROUND_CUR_DIRECTION);
11548}
11549
11550extern __inline __m128
11551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11552_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
11553{
11554 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11555 (__v4sf) __B,
11556 (__v4sf)
11557 _mm_setzero_ps (),
11558 (__mmask8) __U,
11559 _MM_FROUND_CUR_DIRECTION);
11560}
11561
756c5857
AI
11562extern __inline __m512d
11563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11564_mm512_div_pd (__m512d __M, __m512d __V)
11565{
2069d6fc 11566 return (__m512d) ((__v8df)__M / (__v8df)__V);
756c5857
AI
11567}
11568
11569extern __inline __m512d
11570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11571_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
11572{
11573 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11574 (__v8df) __V,
11575 (__v8df) __W,
11576 (__mmask8) __U,
11577 _MM_FROUND_CUR_DIRECTION);
11578}
11579
11580extern __inline __m512d
11581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11582_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
11583{
11584 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11585 (__v8df) __V,
11586 (__v8df)
11587 _mm512_setzero_pd (),
11588 (__mmask8) __U,
11589 _MM_FROUND_CUR_DIRECTION);
11590}
11591
11592extern __inline __m512
11593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11594_mm512_div_ps (__m512 __A, __m512 __B)
11595{
2069d6fc 11596 return (__m512) ((__v16sf)__A / (__v16sf)__B);
756c5857
AI
11597}
11598
11599extern __inline __m512
11600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11601_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11602{
11603 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11604 (__v16sf) __B,
11605 (__v16sf) __W,
11606 (__mmask16) __U,
11607 _MM_FROUND_CUR_DIRECTION);
11608}
11609
11610extern __inline __m512
11611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11612_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11613{
11614 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11615 (__v16sf) __B,
11616 (__v16sf)
11617 _mm512_setzero_ps (),
11618 (__mmask16) __U,
11619 _MM_FROUND_CUR_DIRECTION);
11620}
11621
f4ee3a9e
UB
11622extern __inline __m128d
11623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11624_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
11625 __m128d __B)
11626{
11627 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11628 (__v2df) __B,
11629 (__v2df) __W,
11630 (__mmask8) __U,
11631 _MM_FROUND_CUR_DIRECTION);
11632}
11633
11634extern __inline __m128d
11635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11636_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
11637{
11638 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11639 (__v2df) __B,
11640 (__v2df)
11641 _mm_setzero_pd (),
11642 (__mmask8) __U,
11643 _MM_FROUND_CUR_DIRECTION);
11644}
11645
11646extern __inline __m128
11647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11648_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
11649 __m128 __B)
11650{
11651 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11652 (__v4sf) __B,
11653 (__v4sf) __W,
11654 (__mmask8) __U,
11655 _MM_FROUND_CUR_DIRECTION);
11656}
11657
11658extern __inline __m128
11659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11660_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
11661{
11662 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11663 (__v4sf) __B,
11664 (__v4sf)
11665 _mm_setzero_ps (),
11666 (__mmask8) __U,
11667 _MM_FROUND_CUR_DIRECTION);
11668}
11669
756c5857
AI
11670extern __inline __m512d
11671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672_mm512_max_pd (__m512d __A, __m512d __B)
11673{
11674 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11675 (__v8df) __B,
11676 (__v8df)
0b192937 11677 _mm512_undefined_pd (),
756c5857
AI
11678 (__mmask8) -1,
11679 _MM_FROUND_CUR_DIRECTION);
11680}
11681
11682extern __inline __m512d
11683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11684_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11685{
11686 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11687 (__v8df) __B,
11688 (__v8df) __W,
11689 (__mmask8) __U,
11690 _MM_FROUND_CUR_DIRECTION);
11691}
11692
11693extern __inline __m512d
11694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11695_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
11696{
11697 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11698 (__v8df) __B,
11699 (__v8df)
11700 _mm512_setzero_pd (),
11701 (__mmask8) __U,
11702 _MM_FROUND_CUR_DIRECTION);
11703}
11704
11705extern __inline __m512
11706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11707_mm512_max_ps (__m512 __A, __m512 __B)
11708{
11709 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11710 (__v16sf) __B,
11711 (__v16sf)
0b192937 11712 _mm512_undefined_ps (),
756c5857
AI
11713 (__mmask16) -1,
11714 _MM_FROUND_CUR_DIRECTION);
11715}
11716
11717extern __inline __m512
11718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11719_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11720{
11721 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11722 (__v16sf) __B,
11723 (__v16sf) __W,
11724 (__mmask16) __U,
11725 _MM_FROUND_CUR_DIRECTION);
11726}
11727
11728extern __inline __m512
11729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11730_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11731{
11732 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11733 (__v16sf) __B,
11734 (__v16sf)
11735 _mm512_setzero_ps (),
11736 (__mmask16) __U,
11737 _MM_FROUND_CUR_DIRECTION);
11738}
11739
dc7401c0
SP
11740extern __inline __m128d
11741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11742_mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11743{
11744 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11745 (__v2df) __B,
11746 (__v2df) __W,
11747 (__mmask8) __U,
11748 _MM_FROUND_CUR_DIRECTION);
11749}
11750
11751extern __inline __m128d
11752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11753_mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
11754{
11755 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11756 (__v2df) __B,
11757 (__v2df)
11758 _mm_setzero_pd (),
11759 (__mmask8) __U,
11760 _MM_FROUND_CUR_DIRECTION);
11761}
11762
11763extern __inline __m128
11764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11765_mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11766{
11767 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11768 (__v4sf) __B,
11769 (__v4sf) __W,
11770 (__mmask8) __U,
11771 _MM_FROUND_CUR_DIRECTION);
11772}
11773
11774extern __inline __m128
11775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11776_mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
11777{
11778 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11779 (__v4sf) __B,
11780 (__v4sf)
11781 _mm_setzero_ps (),
11782 (__mmask8) __U,
11783 _MM_FROUND_CUR_DIRECTION);
11784}
11785
756c5857
AI
11786extern __inline __m512d
11787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11788_mm512_min_pd (__m512d __A, __m512d __B)
11789{
11790 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11791 (__v8df) __B,
11792 (__v8df)
0b192937 11793 _mm512_undefined_pd (),
756c5857
AI
11794 (__mmask8) -1,
11795 _MM_FROUND_CUR_DIRECTION);
11796}
11797
11798extern __inline __m512d
11799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11800_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11801{
11802 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11803 (__v8df) __B,
11804 (__v8df) __W,
11805 (__mmask8) __U,
11806 _MM_FROUND_CUR_DIRECTION);
11807}
11808
11809extern __inline __m512d
11810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11811_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11812{
11813 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11814 (__v8df) __B,
11815 (__v8df)
11816 _mm512_setzero_pd (),
11817 (__mmask8) __U,
11818 _MM_FROUND_CUR_DIRECTION);
11819}
11820
11821extern __inline __m512
11822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11823_mm512_min_ps (__m512 __A, __m512 __B)
11824{
11825 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11826 (__v16sf) __B,
11827 (__v16sf)
0b192937 11828 _mm512_undefined_ps (),
756c5857
AI
11829 (__mmask16) -1,
11830 _MM_FROUND_CUR_DIRECTION);
11831}
11832
11833extern __inline __m512
11834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11836{
11837 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11838 (__v16sf) __B,
11839 (__v16sf) __W,
11840 (__mmask16) __U,
11841 _MM_FROUND_CUR_DIRECTION);
11842}
11843
11844extern __inline __m512
11845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11846_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11847{
11848 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11849 (__v16sf) __B,
11850 (__v16sf)
11851 _mm512_setzero_ps (),
11852 (__mmask16) __U,
11853 _MM_FROUND_CUR_DIRECTION);
11854}
11855
dc7401c0
SP
11856extern __inline __m128d
11857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11858_mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11859{
11860 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11861 (__v2df) __B,
11862 (__v2df) __W,
11863 (__mmask8) __U,
11864 _MM_FROUND_CUR_DIRECTION);
11865}
11866
11867extern __inline __m128d
11868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11869_mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
11870{
11871 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11872 (__v2df) __B,
11873 (__v2df)
11874 _mm_setzero_pd (),
11875 (__mmask8) __U,
11876 _MM_FROUND_CUR_DIRECTION);
11877}
11878
11879extern __inline __m128
11880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11881_mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11882{
11883 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11884 (__v4sf) __B,
11885 (__v4sf) __W,
11886 (__mmask8) __U,
11887 _MM_FROUND_CUR_DIRECTION);
11888}
11889
11890extern __inline __m128
11891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11892_mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
11893{
11894 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11895 (__v4sf) __B,
11896 (__v4sf)
11897 _mm_setzero_ps (),
11898 (__mmask8) __U,
11899 _MM_FROUND_CUR_DIRECTION);
11900}
11901
756c5857
AI
11902extern __inline __m512d
11903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11904_mm512_scalef_pd (__m512d __A, __m512d __B)
11905{
11906 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11907 (__v8df) __B,
11908 (__v8df)
0b192937 11909 _mm512_undefined_pd (),
756c5857
AI
11910 (__mmask8) -1,
11911 _MM_FROUND_CUR_DIRECTION);
11912}
11913
11914extern __inline __m512d
11915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11916_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11917{
11918 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11919 (__v8df) __B,
11920 (__v8df) __W,
11921 (__mmask8) __U,
11922 _MM_FROUND_CUR_DIRECTION);
11923}
11924
11925extern __inline __m512d
11926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11927_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11928{
11929 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11930 (__v8df) __B,
11931 (__v8df)
11932 _mm512_setzero_pd (),
11933 (__mmask8) __U,
11934 _MM_FROUND_CUR_DIRECTION);
11935}
11936
11937extern __inline __m512
11938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11939_mm512_scalef_ps (__m512 __A, __m512 __B)
11940{
11941 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11942 (__v16sf) __B,
11943 (__v16sf)
0b192937 11944 _mm512_undefined_ps (),
756c5857
AI
11945 (__mmask16) -1,
11946 _MM_FROUND_CUR_DIRECTION);
11947}
11948
11949extern __inline __m512
11950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11951_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11952{
11953 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11954 (__v16sf) __B,
11955 (__v16sf) __W,
11956 (__mmask16) __U,
11957 _MM_FROUND_CUR_DIRECTION);
11958}
11959
11960extern __inline __m512
11961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11962_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11963{
11964 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11965 (__v16sf) __B,
11966 (__v16sf)
11967 _mm512_setzero_ps (),
11968 (__mmask16) __U,
11969 _MM_FROUND_CUR_DIRECTION);
11970}
11971
075691af
AI
11972extern __inline __m128d
11973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11974_mm_scalef_sd (__m128d __A, __m128d __B)
11975{
11976 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11977 (__v2df) __B,
11978 _MM_FROUND_CUR_DIRECTION);
11979}
11980
11981extern __inline __m128
11982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11983_mm_scalef_ss (__m128 __A, __m128 __B)
11984{
11985 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11986 (__v4sf) __B,
11987 _MM_FROUND_CUR_DIRECTION);
11988}
11989
756c5857
AI
11990extern __inline __m512d
11991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11992_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11993{
11994 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11995 (__v8df) __B,
11996 (__v8df) __C,
11997 (__mmask8) -1,
11998 _MM_FROUND_CUR_DIRECTION);
11999}
12000
12001extern __inline __m512d
12002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12004{
12005 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12006 (__v8df) __B,
12007 (__v8df) __C,
12008 (__mmask8) __U,
12009 _MM_FROUND_CUR_DIRECTION);
12010}
12011
12012extern __inline __m512d
12013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12014_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12015{
12016 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
12017 (__v8df) __B,
12018 (__v8df) __C,
12019 (__mmask8) __U,
12020 _MM_FROUND_CUR_DIRECTION);
12021}
12022
12023extern __inline __m512d
12024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12025_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12026{
12027 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12028 (__v8df) __B,
12029 (__v8df) __C,
12030 (__mmask8) __U,
12031 _MM_FROUND_CUR_DIRECTION);
12032}
12033
12034extern __inline __m512
12035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12036_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12037{
12038 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12039 (__v16sf) __B,
12040 (__v16sf) __C,
12041 (__mmask16) -1,
12042 _MM_FROUND_CUR_DIRECTION);
12043}
12044
12045extern __inline __m512
12046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12047_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12048{
12049 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12050 (__v16sf) __B,
12051 (__v16sf) __C,
12052 (__mmask16) __U,
12053 _MM_FROUND_CUR_DIRECTION);
12054}
12055
12056extern __inline __m512
12057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12058_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12059{
12060 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
12061 (__v16sf) __B,
12062 (__v16sf) __C,
12063 (__mmask16) __U,
12064 _MM_FROUND_CUR_DIRECTION);
12065}
12066
12067extern __inline __m512
12068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12069_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12070{
12071 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12072 (__v16sf) __B,
12073 (__v16sf) __C,
12074 (__mmask16) __U,
12075 _MM_FROUND_CUR_DIRECTION);
12076}
12077
12078extern __inline __m512d
12079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12080_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12081{
12082 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12083 (__v8df) __B,
12084 -(__v8df) __C,
12085 (__mmask8) -1,
12086 _MM_FROUND_CUR_DIRECTION);
12087}
12088
12089extern __inline __m512d
12090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12091_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12092{
12093 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12094 (__v8df) __B,
12095 -(__v8df) __C,
12096 (__mmask8) __U,
12097 _MM_FROUND_CUR_DIRECTION);
12098}
12099
12100extern __inline __m512d
12101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12102_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12103{
12104 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
12105 (__v8df) __B,
12106 (__v8df) __C,
12107 (__mmask8) __U,
12108 _MM_FROUND_CUR_DIRECTION);
12109}
12110
12111extern __inline __m512d
12112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12113_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12114{
12115 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12116 (__v8df) __B,
12117 -(__v8df) __C,
12118 (__mmask8) __U,
12119 _MM_FROUND_CUR_DIRECTION);
12120}
12121
12122extern __inline __m512
12123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12124_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12125{
12126 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12127 (__v16sf) __B,
12128 -(__v16sf) __C,
12129 (__mmask16) -1,
12130 _MM_FROUND_CUR_DIRECTION);
12131}
12132
12133extern __inline __m512
12134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12135_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12136{
12137 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12138 (__v16sf) __B,
12139 -(__v16sf) __C,
12140 (__mmask16) __U,
12141 _MM_FROUND_CUR_DIRECTION);
12142}
12143
12144extern __inline __m512
12145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12146_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12147{
12148 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
12149 (__v16sf) __B,
12150 (__v16sf) __C,
12151 (__mmask16) __U,
12152 _MM_FROUND_CUR_DIRECTION);
12153}
12154
12155extern __inline __m512
12156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12157_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12158{
12159 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12160 (__v16sf) __B,
12161 -(__v16sf) __C,
12162 (__mmask16) __U,
12163 _MM_FROUND_CUR_DIRECTION);
12164}
12165
12166extern __inline __m512d
12167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12168_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
12169{
12170 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12171 (__v8df) __B,
12172 (__v8df) __C,
12173 (__mmask8) -1,
12174 _MM_FROUND_CUR_DIRECTION);
12175}
12176
12177extern __inline __m512d
12178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12179_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12180{
12181 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12182 (__v8df) __B,
12183 (__v8df) __C,
12184 (__mmask8) __U,
12185 _MM_FROUND_CUR_DIRECTION);
12186}
12187
12188extern __inline __m512d
12189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12190_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12191{
12192 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
12193 (__v8df) __B,
12194 (__v8df) __C,
12195 (__mmask8) __U,
12196 _MM_FROUND_CUR_DIRECTION);
12197}
12198
12199extern __inline __m512d
12200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12201_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12202{
12203 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12204 (__v8df) __B,
12205 (__v8df) __C,
12206 (__mmask8) __U,
12207 _MM_FROUND_CUR_DIRECTION);
12208}
12209
12210extern __inline __m512
12211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12212_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
12213{
12214 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12215 (__v16sf) __B,
12216 (__v16sf) __C,
12217 (__mmask16) -1,
12218 _MM_FROUND_CUR_DIRECTION);
12219}
12220
12221extern __inline __m512
12222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12223_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12224{
12225 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12226 (__v16sf) __B,
12227 (__v16sf) __C,
12228 (__mmask16) __U,
12229 _MM_FROUND_CUR_DIRECTION);
12230}
12231
12232extern __inline __m512
12233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12234_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12235{
12236 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
12237 (__v16sf) __B,
12238 (__v16sf) __C,
12239 (__mmask16) __U,
12240 _MM_FROUND_CUR_DIRECTION);
12241}
12242
12243extern __inline __m512
12244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12245_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12246{
12247 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12248 (__v16sf) __B,
12249 (__v16sf) __C,
12250 (__mmask16) __U,
12251 _MM_FROUND_CUR_DIRECTION);
12252}
12253
12254extern __inline __m512d
12255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12256_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
12257{
12258 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12259 (__v8df) __B,
12260 -(__v8df) __C,
12261 (__mmask8) -1,
12262 _MM_FROUND_CUR_DIRECTION);
12263}
12264
12265extern __inline __m512d
12266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12267_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12268{
12269 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12270 (__v8df) __B,
12271 -(__v8df) __C,
12272 (__mmask8) __U,
12273 _MM_FROUND_CUR_DIRECTION);
12274}
12275
12276extern __inline __m512d
12277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12278_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12279{
12280 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
12281 (__v8df) __B,
12282 (__v8df) __C,
12283 (__mmask8) __U,
12284 _MM_FROUND_CUR_DIRECTION);
12285}
12286
12287extern __inline __m512d
12288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12289_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12290{
12291 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12292 (__v8df) __B,
12293 -(__v8df) __C,
12294 (__mmask8) __U,
12295 _MM_FROUND_CUR_DIRECTION);
12296}
12297
12298extern __inline __m512
12299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
12301{
12302 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12303 (__v16sf) __B,
12304 -(__v16sf) __C,
12305 (__mmask16) -1,
12306 _MM_FROUND_CUR_DIRECTION);
12307}
12308
12309extern __inline __m512
12310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12311_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12312{
12313 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12314 (__v16sf) __B,
12315 -(__v16sf) __C,
12316 (__mmask16) __U,
12317 _MM_FROUND_CUR_DIRECTION);
12318}
12319
12320extern __inline __m512
12321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12322_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12323{
12324 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
12325 (__v16sf) __B,
12326 (__v16sf) __C,
12327 (__mmask16) __U,
12328 _MM_FROUND_CUR_DIRECTION);
12329}
12330
12331extern __inline __m512
12332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12333_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12334{
12335 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12336 (__v16sf) __B,
12337 -(__v16sf) __C,
12338 (__mmask16) __U,
12339 _MM_FROUND_CUR_DIRECTION);
12340}
12341
12342extern __inline __m512d
12343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12344_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12345{
12346 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12347 (__v8df) __B,
12348 (__v8df) __C,
12349 (__mmask8) -1,
12350 _MM_FROUND_CUR_DIRECTION);
12351}
12352
12353extern __inline __m512d
12354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12355_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12356{
12357 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
12358 (__v8df) __B,
12359 (__v8df) __C,
12360 (__mmask8) __U,
12361 _MM_FROUND_CUR_DIRECTION);
12362}
12363
12364extern __inline __m512d
12365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12366_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12367{
12368 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
12369 (__v8df) __B,
12370 (__v8df) __C,
12371 (__mmask8) __U,
12372 _MM_FROUND_CUR_DIRECTION);
12373}
12374
12375extern __inline __m512d
12376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12378{
12379 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12380 (__v8df) __B,
12381 (__v8df) __C,
12382 (__mmask8) __U,
12383 _MM_FROUND_CUR_DIRECTION);
12384}
12385
12386extern __inline __m512
12387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12388_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12389{
12390 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12391 (__v16sf) __B,
12392 (__v16sf) __C,
12393 (__mmask16) -1,
12394 _MM_FROUND_CUR_DIRECTION);
12395}
12396
12397extern __inline __m512
12398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12399_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12400{
12401 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
12402 (__v16sf) __B,
12403 (__v16sf) __C,
12404 (__mmask16) __U,
12405 _MM_FROUND_CUR_DIRECTION);
12406}
12407
12408extern __inline __m512
12409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12410_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12411{
12412 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
12413 (__v16sf) __B,
12414 (__v16sf) __C,
12415 (__mmask16) __U,
12416 _MM_FROUND_CUR_DIRECTION);
12417}
12418
12419extern __inline __m512
12420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12421_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12422{
12423 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12424 (__v16sf) __B,
12425 (__v16sf) __C,
12426 (__mmask16) __U,
12427 _MM_FROUND_CUR_DIRECTION);
12428}
12429
12430extern __inline __m512d
12431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12432_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12433{
12434 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12435 (__v8df) __B,
12436 -(__v8df) __C,
12437 (__mmask8) -1,
12438 _MM_FROUND_CUR_DIRECTION);
12439}
12440
12441extern __inline __m512d
12442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12443_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12444{
12445 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
12446 (__v8df) __B,
12447 (__v8df) __C,
12448 (__mmask8) __U,
12449 _MM_FROUND_CUR_DIRECTION);
12450}
12451
12452extern __inline __m512d
12453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12454_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12455{
12456 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
12457 (__v8df) __B,
12458 (__v8df) __C,
12459 (__mmask8) __U,
12460 _MM_FROUND_CUR_DIRECTION);
12461}
12462
12463extern __inline __m512d
12464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12465_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12466{
12467 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12468 (__v8df) __B,
12469 -(__v8df) __C,
12470 (__mmask8) __U,
12471 _MM_FROUND_CUR_DIRECTION);
12472}
12473
12474extern __inline __m512
12475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12476_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12477{
12478 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12479 (__v16sf) __B,
12480 -(__v16sf) __C,
12481 (__mmask16) -1,
12482 _MM_FROUND_CUR_DIRECTION);
12483}
12484
12485extern __inline __m512
12486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12487_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12488{
12489 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
12490 (__v16sf) __B,
12491 (__v16sf) __C,
12492 (__mmask16) __U,
12493 _MM_FROUND_CUR_DIRECTION);
12494}
12495
12496extern __inline __m512
12497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12498_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12499{
12500 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
12501 (__v16sf) __B,
12502 (__v16sf) __C,
12503 (__mmask16) __U,
12504 _MM_FROUND_CUR_DIRECTION);
12505}
12506
12507extern __inline __m512
12508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12509_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12510{
12511 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12512 (__v16sf) __B,
12513 -(__v16sf) __C,
12514 (__mmask16) __U,
12515 _MM_FROUND_CUR_DIRECTION);
12516}
12517
12518extern __inline __m256i
12519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12520_mm512_cvttpd_epi32 (__m512d __A)
12521{
12522 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12523 (__v8si)
0b192937 12524 _mm256_undefined_si256 (),
756c5857
AI
12525 (__mmask8) -1,
12526 _MM_FROUND_CUR_DIRECTION);
12527}
12528
12529extern __inline __m256i
12530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12531_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12532{
12533 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12534 (__v8si) __W,
12535 (__mmask8) __U,
12536 _MM_FROUND_CUR_DIRECTION);
12537}
12538
12539extern __inline __m256i
12540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12541_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
12542{
12543 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12544 (__v8si)
12545 _mm256_setzero_si256 (),
12546 (__mmask8) __U,
12547 _MM_FROUND_CUR_DIRECTION);
12548}
12549
12550extern __inline __m256i
12551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12552_mm512_cvttpd_epu32 (__m512d __A)
12553{
12554 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12555 (__v8si)
0b192937 12556 _mm256_undefined_si256 (),
756c5857
AI
12557 (__mmask8) -1,
12558 _MM_FROUND_CUR_DIRECTION);
12559}
12560
12561extern __inline __m256i
12562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12563_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12564{
12565 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12566 (__v8si) __W,
12567 (__mmask8) __U,
12568 _MM_FROUND_CUR_DIRECTION);
12569}
12570
12571extern __inline __m256i
12572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12573_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
12574{
12575 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12576 (__v8si)
12577 _mm256_setzero_si256 (),
12578 (__mmask8) __U,
12579 _MM_FROUND_CUR_DIRECTION);
12580}
12581
12582extern __inline __m256i
12583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12584_mm512_cvtpd_epi32 (__m512d __A)
12585{
12586 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12587 (__v8si)
0b192937 12588 _mm256_undefined_si256 (),
756c5857
AI
12589 (__mmask8) -1,
12590 _MM_FROUND_CUR_DIRECTION);
12591}
12592
12593extern __inline __m256i
12594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12595_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12596{
12597 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12598 (__v8si) __W,
12599 (__mmask8) __U,
12600 _MM_FROUND_CUR_DIRECTION);
12601}
12602
12603extern __inline __m256i
12604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12605_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
12606{
12607 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12608 (__v8si)
12609 _mm256_setzero_si256 (),
12610 (__mmask8) __U,
12611 _MM_FROUND_CUR_DIRECTION);
12612}
12613
12614extern __inline __m256i
12615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12616_mm512_cvtpd_epu32 (__m512d __A)
12617{
12618 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12619 (__v8si)
0b192937 12620 _mm256_undefined_si256 (),
756c5857
AI
12621 (__mmask8) -1,
12622 _MM_FROUND_CUR_DIRECTION);
12623}
12624
12625extern __inline __m256i
12626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12627_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12628{
12629 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12630 (__v8si) __W,
12631 (__mmask8) __U,
12632 _MM_FROUND_CUR_DIRECTION);
12633}
12634
12635extern __inline __m256i
12636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12637_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
12638{
12639 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12640 (__v8si)
12641 _mm256_setzero_si256 (),
12642 (__mmask8) __U,
12643 _MM_FROUND_CUR_DIRECTION);
12644}
12645
12646extern __inline __m512i
12647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12648_mm512_cvttps_epi32 (__m512 __A)
12649{
12650 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12651 (__v16si)
4271e5cb 12652 _mm512_undefined_epi32 (),
756c5857
AI
12653 (__mmask16) -1,
12654 _MM_FROUND_CUR_DIRECTION);
12655}
12656
12657extern __inline __m512i
12658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12659_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12660{
12661 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12662 (__v16si) __W,
12663 (__mmask16) __U,
12664 _MM_FROUND_CUR_DIRECTION);
12665}
12666
12667extern __inline __m512i
12668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12669_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
12670{
12671 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12672 (__v16si)
12673 _mm512_setzero_si512 (),
12674 (__mmask16) __U,
12675 _MM_FROUND_CUR_DIRECTION);
12676}
12677
12678extern __inline __m512i
12679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12680_mm512_cvttps_epu32 (__m512 __A)
12681{
12682 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12683 (__v16si)
4271e5cb 12684 _mm512_undefined_epi32 (),
756c5857
AI
12685 (__mmask16) -1,
12686 _MM_FROUND_CUR_DIRECTION);
12687}
12688
12689extern __inline __m512i
12690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12691_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12692{
12693 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12694 (__v16si) __W,
12695 (__mmask16) __U,
12696 _MM_FROUND_CUR_DIRECTION);
12697}
12698
12699extern __inline __m512i
12700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12701_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
12702{
12703 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12704 (__v16si)
12705 _mm512_setzero_si512 (),
12706 (__mmask16) __U,
12707 _MM_FROUND_CUR_DIRECTION);
12708}
12709
12710extern __inline __m512i
12711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12712_mm512_cvtps_epi32 (__m512 __A)
12713{
12714 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12715 (__v16si)
4271e5cb 12716 _mm512_undefined_epi32 (),
756c5857
AI
12717 (__mmask16) -1,
12718 _MM_FROUND_CUR_DIRECTION);
12719}
12720
12721extern __inline __m512i
12722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12723_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12724{
12725 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12726 (__v16si) __W,
12727 (__mmask16) __U,
12728 _MM_FROUND_CUR_DIRECTION);
12729}
12730
12731extern __inline __m512i
12732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12733_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
12734{
12735 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12736 (__v16si)
12737 _mm512_setzero_si512 (),
12738 (__mmask16) __U,
12739 _MM_FROUND_CUR_DIRECTION);
12740}
12741
12742extern __inline __m512i
12743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12744_mm512_cvtps_epu32 (__m512 __A)
12745{
12746 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12747 (__v16si)
4271e5cb 12748 _mm512_undefined_epi32 (),
756c5857
AI
12749 (__mmask16) -1,
12750 _MM_FROUND_CUR_DIRECTION);
12751}
12752
12753extern __inline __m512i
12754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12755_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12756{
12757 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12758 (__v16si) __W,
12759 (__mmask16) __U,
12760 _MM_FROUND_CUR_DIRECTION);
12761}
12762
12763extern __inline __m512i
12764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12765_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
12766{
12767 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12768 (__v16si)
12769 _mm512_setzero_si512 (),
12770 (__mmask16) __U,
12771 _MM_FROUND_CUR_DIRECTION);
12772}
12773
dcb2c527
JJ
12774extern __inline double
12775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12776_mm512_cvtsd_f64 (__m512d __A)
12777{
12778 return __A[0];
12779}
12780
12781extern __inline float
12782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12783_mm512_cvtss_f32 (__m512 __A)
12784{
12785 return __A[0];
12786}
12787
756c5857
AI
12788#ifdef __x86_64__
12789extern __inline __m128
12790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12791_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
12792{
12793 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
12794 _MM_FROUND_CUR_DIRECTION);
12795}
12796
12797extern __inline __m128d
12798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12799_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
12800{
12801 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
12802 _MM_FROUND_CUR_DIRECTION);
12803}
12804#endif
12805
12806extern __inline __m128
12807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12808_mm_cvtu32_ss (__m128 __A, unsigned __B)
12809{
12810 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
12811 _MM_FROUND_CUR_DIRECTION);
12812}
12813
12814extern __inline __m512
12815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12816_mm512_cvtepi32_ps (__m512i __A)
12817{
12818 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12819 (__v16sf)
0b192937 12820 _mm512_undefined_ps (),
756c5857
AI
12821 (__mmask16) -1,
12822 _MM_FROUND_CUR_DIRECTION);
12823}
12824
12825extern __inline __m512
12826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12827_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12828{
12829 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12830 (__v16sf) __W,
12831 (__mmask16) __U,
12832 _MM_FROUND_CUR_DIRECTION);
12833}
12834
12835extern __inline __m512
12836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12837_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
12838{
12839 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12840 (__v16sf)
12841 _mm512_setzero_ps (),
12842 (__mmask16) __U,
12843 _MM_FROUND_CUR_DIRECTION);
12844}
12845
12846extern __inline __m512
12847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12848_mm512_cvtepu32_ps (__m512i __A)
12849{
12850 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12851 (__v16sf)
0b192937 12852 _mm512_undefined_ps (),
756c5857
AI
12853 (__mmask16) -1,
12854 _MM_FROUND_CUR_DIRECTION);
12855}
12856
12857extern __inline __m512
12858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12859_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12860{
12861 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12862 (__v16sf) __W,
12863 (__mmask16) __U,
12864 _MM_FROUND_CUR_DIRECTION);
12865}
12866
12867extern __inline __m512
12868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12869_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12870{
12871 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12872 (__v16sf)
12873 _mm512_setzero_ps (),
12874 (__mmask16) __U,
12875 _MM_FROUND_CUR_DIRECTION);
12876}
12877
12878#ifdef __OPTIMIZE__
12879extern __inline __m512d
12880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12881_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12882{
12883 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12884 (__v8df) __B,
12885 (__v8di) __C,
12886 __imm,
12887 (__mmask8) -1,
12888 _MM_FROUND_CUR_DIRECTION);
12889}
12890
12891extern __inline __m512d
12892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12893_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12894 __m512i __C, const int __imm)
12895{
12896 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12897 (__v8df) __B,
12898 (__v8di) __C,
12899 __imm,
12900 (__mmask8) __U,
12901 _MM_FROUND_CUR_DIRECTION);
12902}
12903
12904extern __inline __m512d
12905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12906_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12907 __m512i __C, const int __imm)
12908{
12909 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12910 (__v8df) __B,
12911 (__v8di) __C,
12912 __imm,
12913 (__mmask8) __U,
12914 _MM_FROUND_CUR_DIRECTION);
12915}
12916
12917extern __inline __m512
12918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12919_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12920{
12921 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12922 (__v16sf) __B,
12923 (__v16si) __C,
12924 __imm,
12925 (__mmask16) -1,
12926 _MM_FROUND_CUR_DIRECTION);
12927}
12928
12929extern __inline __m512
12930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12931_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12932 __m512i __C, const int __imm)
12933{
12934 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12935 (__v16sf) __B,
12936 (__v16si) __C,
12937 __imm,
12938 (__mmask16) __U,
12939 _MM_FROUND_CUR_DIRECTION);
12940}
12941
12942extern __inline __m512
12943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12944_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12945 __m512i __C, const int __imm)
12946{
12947 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12948 (__v16sf) __B,
12949 (__v16si) __C,
12950 __imm,
12951 (__mmask16) __U,
12952 _MM_FROUND_CUR_DIRECTION);
12953}
12954
12955extern __inline __m128d
12956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12957_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12958{
12959 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12960 (__v2df) __B,
12961 (__v2di) __C, __imm,
12962 (__mmask8) -1,
12963 _MM_FROUND_CUR_DIRECTION);
12964}
12965
12966extern __inline __m128d
12967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12968_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12969 __m128i __C, const int __imm)
12970{
12971 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12972 (__v2df) __B,
12973 (__v2di) __C, __imm,
12974 (__mmask8) __U,
12975 _MM_FROUND_CUR_DIRECTION);
12976}
12977
12978extern __inline __m128d
12979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12980_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12981 __m128i __C, const int __imm)
12982{
12983 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12984 (__v2df) __B,
12985 (__v2di) __C,
12986 __imm,
12987 (__mmask8) __U,
12988 _MM_FROUND_CUR_DIRECTION);
12989}
12990
12991extern __inline __m128
12992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12993_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12994{
12995 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12996 (__v4sf) __B,
12997 (__v4si) __C, __imm,
12998 (__mmask8) -1,
12999 _MM_FROUND_CUR_DIRECTION);
13000}
13001
13002extern __inline __m128
13003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13004_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
13005 __m128i __C, const int __imm)
13006{
13007 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13008 (__v4sf) __B,
13009 (__v4si) __C, __imm,
13010 (__mmask8) __U,
13011 _MM_FROUND_CUR_DIRECTION);
13012}
13013
13014extern __inline __m128
13015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13016_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
13017 __m128i __C, const int __imm)
13018{
13019 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
13020 (__v4sf) __B,
13021 (__v4si) __C, __imm,
13022 (__mmask8) __U,
13023 _MM_FROUND_CUR_DIRECTION);
13024}
13025#else
13026#define _mm512_fixupimm_pd(X, Y, Z, C) \
13027 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13028 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13029 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13030
13031#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
13032 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13033 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13034 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13035
13036#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
13037 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
13038 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13039 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13040
13041#define _mm512_fixupimm_ps(X, Y, Z, C) \
13042 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13043 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13044 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
13045
13046#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
13047 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13048 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13049 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13050
13051#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
13052 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
13053 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13054 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13055
13056#define _mm_fixupimm_sd(X, Y, Z, C) \
13057 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13058 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13059 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13060
13061#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
13062 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13063 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13064 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13065
13066#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
13067 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
13068 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13069 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13070
13071#define _mm_fixupimm_ss(X, Y, Z, C) \
13072 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13073 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13074 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13075
13076#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
13077 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13078 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13079 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13080
13081#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
13082 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
13083 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13084 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13085#endif
13086
13087#ifdef __x86_64__
13088extern __inline unsigned long long
13089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13090_mm_cvtss_u64 (__m128 __A)
13091{
13092 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
13093 __A,
13094 _MM_FROUND_CUR_DIRECTION);
13095}
13096
13097extern __inline unsigned long long
13098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13099_mm_cvttss_u64 (__m128 __A)
13100{
13101 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
13102 __A,
13103 _MM_FROUND_CUR_DIRECTION);
13104}
13105
13106extern __inline long long
13107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13108_mm_cvttss_i64 (__m128 __A)
13109{
13110 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
13111 _MM_FROUND_CUR_DIRECTION);
13112}
13113#endif /* __x86_64__ */
13114
13115extern __inline unsigned
13116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13117_mm_cvtss_u32 (__m128 __A)
13118{
13119 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
13120 _MM_FROUND_CUR_DIRECTION);
13121}
13122
13123extern __inline unsigned
13124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13125_mm_cvttss_u32 (__m128 __A)
13126{
13127 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
13128 _MM_FROUND_CUR_DIRECTION);
13129}
13130
13131extern __inline int
13132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13133_mm_cvttss_i32 (__m128 __A)
13134{
13135 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
13136 _MM_FROUND_CUR_DIRECTION);
13137}
13138
13139#ifdef __x86_64__
13140extern __inline unsigned long long
13141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13142_mm_cvtsd_u64 (__m128d __A)
13143{
13144 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
13145 __A,
13146 _MM_FROUND_CUR_DIRECTION);
13147}
13148
13149extern __inline unsigned long long
13150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13151_mm_cvttsd_u64 (__m128d __A)
13152{
13153 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
13154 __A,
13155 _MM_FROUND_CUR_DIRECTION);
13156}
13157
13158extern __inline long long
13159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13160_mm_cvttsd_i64 (__m128d __A)
13161{
13162 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
13163 _MM_FROUND_CUR_DIRECTION);
13164}
13165#endif /* __x86_64__ */
13166
13167extern __inline unsigned
13168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13169_mm_cvtsd_u32 (__m128d __A)
13170{
13171 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
13172 _MM_FROUND_CUR_DIRECTION);
13173}
13174
13175extern __inline unsigned
13176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13177_mm_cvttsd_u32 (__m128d __A)
13178{
13179 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
13180 _MM_FROUND_CUR_DIRECTION);
13181}
13182
13183extern __inline int
13184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13185_mm_cvttsd_i32 (__m128d __A)
13186{
13187 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
13188 _MM_FROUND_CUR_DIRECTION);
13189}
13190
13191extern __inline __m512d
13192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13193_mm512_cvtps_pd (__m256 __A)
13194{
13195 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13196 (__v8df)
0b192937 13197 _mm512_undefined_pd (),
756c5857
AI
13198 (__mmask8) -1,
13199 _MM_FROUND_CUR_DIRECTION);
13200}
13201
13202extern __inline __m512d
13203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13204_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
13205{
13206 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13207 (__v8df) __W,
13208 (__mmask8) __U,
13209 _MM_FROUND_CUR_DIRECTION);
13210}
13211
13212extern __inline __m512d
13213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13214_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
13215{
13216 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13217 (__v8df)
13218 _mm512_setzero_pd (),
13219 (__mmask8) __U,
13220 _MM_FROUND_CUR_DIRECTION);
13221}
13222
13223extern __inline __m512
13224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13225_mm512_cvtph_ps (__m256i __A)
13226{
13227 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13228 (__v16sf)
0b192937 13229 _mm512_undefined_ps (),
756c5857
AI
13230 (__mmask16) -1,
13231 _MM_FROUND_CUR_DIRECTION);
13232}
13233
13234extern __inline __m512
13235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13236_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
13237{
13238 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13239 (__v16sf) __W,
13240 (__mmask16) __U,
13241 _MM_FROUND_CUR_DIRECTION);
13242}
13243
13244extern __inline __m512
13245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13246_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
13247{
13248 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13249 (__v16sf)
13250 _mm512_setzero_ps (),
13251 (__mmask16) __U,
13252 _MM_FROUND_CUR_DIRECTION);
13253}
13254
13255extern __inline __m256
13256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13257_mm512_cvtpd_ps (__m512d __A)
13258{
13259 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13260 (__v8sf)
0b192937 13261 _mm256_undefined_ps (),
756c5857
AI
13262 (__mmask8) -1,
13263 _MM_FROUND_CUR_DIRECTION);
13264}
13265
13266extern __inline __m256
13267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13268_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
13269{
13270 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13271 (__v8sf) __W,
13272 (__mmask8) __U,
13273 _MM_FROUND_CUR_DIRECTION);
13274}
13275
13276extern __inline __m256
13277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13278_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
13279{
13280 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13281 (__v8sf)
13282 _mm256_setzero_ps (),
13283 (__mmask8) __U,
13284 _MM_FROUND_CUR_DIRECTION);
13285}
13286
13287#ifdef __OPTIMIZE__
13288extern __inline __m512
13289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13290_mm512_getexp_ps (__m512 __A)
13291{
13292 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13293 (__v16sf)
0b192937 13294 _mm512_undefined_ps (),
756c5857
AI
13295 (__mmask16) -1,
13296 _MM_FROUND_CUR_DIRECTION);
13297}
13298
13299extern __inline __m512
13300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13301_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
13302{
13303 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13304 (__v16sf) __W,
13305 (__mmask16) __U,
13306 _MM_FROUND_CUR_DIRECTION);
13307}
13308
13309extern __inline __m512
13310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13311_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
13312{
13313 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13314 (__v16sf)
13315 _mm512_setzero_ps (),
13316 (__mmask16) __U,
13317 _MM_FROUND_CUR_DIRECTION);
13318}
13319
13320extern __inline __m512d
13321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13322_mm512_getexp_pd (__m512d __A)
13323{
13324 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13325 (__v8df)
0b192937 13326 _mm512_undefined_pd (),
756c5857
AI
13327 (__mmask8) -1,
13328 _MM_FROUND_CUR_DIRECTION);
13329}
13330
13331extern __inline __m512d
13332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13333_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
13334{
13335 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13336 (__v8df) __W,
13337 (__mmask8) __U,
13338 _MM_FROUND_CUR_DIRECTION);
13339}
13340
13341extern __inline __m512d
13342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13343_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
13344{
13345 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13346 (__v8df)
13347 _mm512_setzero_pd (),
13348 (__mmask8) __U,
13349 _MM_FROUND_CUR_DIRECTION);
13350}
13351
075691af
AI
13352extern __inline __m128
13353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13354_mm_getexp_ss (__m128 __A, __m128 __B)
13355{
13356 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
13357 (__v4sf) __B,
13358 _MM_FROUND_CUR_DIRECTION);
13359}
13360
13361extern __inline __m128d
13362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13363_mm_getexp_sd (__m128d __A, __m128d __B)
13364{
13365 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
13366 (__v2df) __B,
13367 _MM_FROUND_CUR_DIRECTION);
13368}
13369
756c5857
AI
13370extern __inline __m512d
13371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13372_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
13373 _MM_MANTISSA_SIGN_ENUM __C)
13374{
13375 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13376 (__C << 2) | __B,
0b192937 13377 _mm512_undefined_pd (),
756c5857
AI
13378 (__mmask8) -1,
13379 _MM_FROUND_CUR_DIRECTION);
13380}
13381
13382extern __inline __m512d
13383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13384_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
13385 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13386{
13387 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13388 (__C << 2) | __B,
13389 (__v8df) __W, __U,
13390 _MM_FROUND_CUR_DIRECTION);
13391}
13392
13393extern __inline __m512d
13394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13395_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
13396 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13397{
13398 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13399 (__C << 2) | __B,
13400 (__v8df)
13401 _mm512_setzero_pd (),
13402 __U,
13403 _MM_FROUND_CUR_DIRECTION);
13404}
13405
13406extern __inline __m512
13407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13408_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
13409 _MM_MANTISSA_SIGN_ENUM __C)
13410{
13411 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13412 (__C << 2) | __B,
0b192937 13413 _mm512_undefined_ps (),
756c5857
AI
13414 (__mmask16) -1,
13415 _MM_FROUND_CUR_DIRECTION);
13416}
13417
13418extern __inline __m512
13419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13420_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
13421 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13422{
13423 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13424 (__C << 2) | __B,
13425 (__v16sf) __W, __U,
13426 _MM_FROUND_CUR_DIRECTION);
13427}
13428
13429extern __inline __m512
13430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13431_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
13432 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13433{
13434 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13435 (__C << 2) | __B,
13436 (__v16sf)
13437 _mm512_setzero_ps (),
13438 __U,
13439 _MM_FROUND_CUR_DIRECTION);
13440}
13441
075691af
AI
13442extern __inline __m128d
13443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13444_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
13445 _MM_MANTISSA_SIGN_ENUM __D)
13446{
13447 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
13448 (__v2df) __B,
13449 (__D << 2) | __C,
13450 _MM_FROUND_CUR_DIRECTION);
13451}
13452
13453extern __inline __m128
13454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13455_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
13456 _MM_MANTISSA_SIGN_ENUM __D)
13457{
13458 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
13459 (__v4sf) __B,
13460 (__D << 2) | __C,
13461 _MM_FROUND_CUR_DIRECTION);
13462}
13463
756c5857
AI
13464#else
13465#define _mm512_getmant_pd(X, B, C) \
13466 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13467 (int)(((C)<<2) | (B)), \
0b192937 13468 (__v8df)_mm512_undefined_pd(), \
756c5857
AI
13469 (__mmask8)-1,\
13470 _MM_FROUND_CUR_DIRECTION))
13471
13472#define _mm512_mask_getmant_pd(W, U, X, B, C) \
13473 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13474 (int)(((C)<<2) | (B)), \
13475 (__v8df)(__m512d)(W), \
13476 (__mmask8)(U),\
13477 _MM_FROUND_CUR_DIRECTION))
13478
13479#define _mm512_maskz_getmant_pd(U, X, B, C) \
13480 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13481 (int)(((C)<<2) | (B)), \
0b192937 13482 (__v8df)_mm512_setzero_pd(), \
756c5857
AI
13483 (__mmask8)(U),\
13484 _MM_FROUND_CUR_DIRECTION))
13485#define _mm512_getmant_ps(X, B, C) \
13486 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13487 (int)(((C)<<2) | (B)), \
0b192937 13488 (__v16sf)_mm512_undefined_ps(), \
756c5857
AI
13489 (__mmask16)-1,\
13490 _MM_FROUND_CUR_DIRECTION))
13491
13492#define _mm512_mask_getmant_ps(W, U, X, B, C) \
13493 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13494 (int)(((C)<<2) | (B)), \
13495 (__v16sf)(__m512)(W), \
13496 (__mmask16)(U),\
13497 _MM_FROUND_CUR_DIRECTION))
13498
13499#define _mm512_maskz_getmant_ps(U, X, B, C) \
13500 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13501 (int)(((C)<<2) | (B)), \
0b192937 13502 (__v16sf)_mm512_setzero_ps(), \
756c5857
AI
13503 (__mmask16)(U),\
13504 _MM_FROUND_CUR_DIRECTION))
075691af
AI
13505#define _mm_getmant_sd(X, Y, C, D) \
13506 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
13507 (__v2df)(__m128d)(Y), \
13508 (int)(((D)<<2) | (C)), \
13509 _MM_FROUND_CUR_DIRECTION))
13510
13511#define _mm_getmant_ss(X, Y, C, D) \
13512 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
13513 (__v4sf)(__m128)(Y), \
13514 (int)(((D)<<2) | (C)), \
13515 _MM_FROUND_CUR_DIRECTION))
13516
13517#define _mm_getexp_ss(A, B) \
13518 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
13519 _MM_FROUND_CUR_DIRECTION))
13520
13521#define _mm_getexp_sd(A, B) \
13522 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
13523 _MM_FROUND_CUR_DIRECTION))
13524
756c5857
AI
13525#define _mm512_getexp_ps(A) \
13526 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 13527 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
13528
13529#define _mm512_mask_getexp_ps(W, U, A) \
13530 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13531 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13532
13533#define _mm512_maskz_getexp_ps(U, A) \
13534 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13535 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13536
13537#define _mm512_getexp_pd(A) \
13538 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 13539 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
13540
13541#define _mm512_mask_getexp_pd(W, U, A) \
13542 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13543 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13544
13545#define _mm512_maskz_getexp_pd(U, A) \
13546 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13547 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13548#endif
13549
13550#ifdef __OPTIMIZE__
13551extern __inline __m512
13552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13553_mm512_roundscale_ps (__m512 __A, const int __imm)
13554{
13555 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
13556 (__v16sf)
13557 _mm512_undefined_ps (),
13558 -1,
756c5857
AI
13559 _MM_FROUND_CUR_DIRECTION);
13560}
13561
13562extern __inline __m512
13563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13564_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
13565 const int __imm)
13566{
13567 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
13568 (__v16sf) __A,
13569 (__mmask16) __B,
13570 _MM_FROUND_CUR_DIRECTION);
13571}
13572
13573extern __inline __m512
13574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13575_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
13576{
13577 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
13578 __imm,
13579 (__v16sf)
13580 _mm512_setzero_ps (),
13581 (__mmask16) __A,
13582 _MM_FROUND_CUR_DIRECTION);
13583}
13584
13585extern __inline __m512d
13586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13587_mm512_roundscale_pd (__m512d __A, const int __imm)
13588{
13589 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
13590 (__v8df)
13591 _mm512_undefined_pd (),
13592 -1,
756c5857
AI
13593 _MM_FROUND_CUR_DIRECTION);
13594}
13595
13596extern __inline __m512d
13597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13598_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
13599 const int __imm)
13600{
13601 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
13602 (__v8df) __A,
13603 (__mmask8) __B,
13604 _MM_FROUND_CUR_DIRECTION);
13605}
13606
13607extern __inline __m512d
13608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13609_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
13610{
13611 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
13612 __imm,
13613 (__v8df)
13614 _mm512_setzero_pd (),
13615 (__mmask8) __A,
13616 _MM_FROUND_CUR_DIRECTION);
13617}
13618
075691af
AI
13619extern __inline __m128
13620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13621_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
13622{
13623 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
13624 (__v4sf) __B, __imm,
13625 _MM_FROUND_CUR_DIRECTION);
13626}
13627
13628extern __inline __m128d
13629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13630_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
13631{
13632 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
13633 (__v2df) __B, __imm,
13634 _MM_FROUND_CUR_DIRECTION);
13635}
13636
756c5857
AI
13637#else
13638#define _mm512_roundscale_ps(A, B) \
13639 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 13640 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
13641#define _mm512_mask_roundscale_ps(A, B, C, D) \
13642 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
13643 (int)(D), \
13644 (__v16sf)(__m512)(A), \
13645 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
13646#define _mm512_maskz_roundscale_ps(A, B, C) \
13647 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
13648 (int)(C), \
13649 (__v16sf)_mm512_setzero_ps(),\
13650 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
13651#define _mm512_roundscale_pd(A, B) \
13652 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 13653 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
13654#define _mm512_mask_roundscale_pd(A, B, C, D) \
13655 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
13656 (int)(D), \
13657 (__v8df)(__m512d)(A), \
13658 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
13659#define _mm512_maskz_roundscale_pd(A, B, C) \
13660 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
13661 (int)(C), \
13662 (__v8df)_mm512_setzero_pd(),\
13663 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
075691af
AI
13664#define _mm_roundscale_ss(A, B, C) \
13665 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
13666 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
13667#define _mm_roundscale_sd(A, B, C) \
13668 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
13669 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
13670#endif
13671
13672#ifdef __OPTIMIZE__
13673extern __inline __mmask8
13674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13675_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
13676{
13677 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
13678 (__v8df) __Y, __P,
13679 (__mmask8) -1,
13680 _MM_FROUND_CUR_DIRECTION);
13681}
13682
13683extern __inline __mmask16
13684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13685_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
13686{
13687 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
13688 (__v16sf) __Y, __P,
13689 (__mmask16) -1,
13690 _MM_FROUND_CUR_DIRECTION);
13691}
13692
13693extern __inline __mmask16
13694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13695_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
13696{
13697 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
13698 (__v16sf) __Y, __P,
13699 (__mmask16) __U,
13700 _MM_FROUND_CUR_DIRECTION);
13701}
13702
13703extern __inline __mmask8
13704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13705_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
13706{
13707 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
13708 (__v8df) __Y, __P,
13709 (__mmask8) __U,
13710 _MM_FROUND_CUR_DIRECTION);
13711}
13712
13713extern __inline __mmask8
13714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13715_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
13716{
13717 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
13718 (__v2df) __Y, __P,
13719 (__mmask8) -1,
13720 _MM_FROUND_CUR_DIRECTION);
13721}
13722
13723extern __inline __mmask8
13724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13725_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
13726{
13727 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
13728 (__v2df) __Y, __P,
13729 (__mmask8) __M,
13730 _MM_FROUND_CUR_DIRECTION);
13731}
13732
13733extern __inline __mmask8
13734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13735_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
13736{
13737 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
13738 (__v4sf) __Y, __P,
13739 (__mmask8) -1,
13740 _MM_FROUND_CUR_DIRECTION);
13741}
13742
13743extern __inline __mmask8
13744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13745_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
13746{
13747 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
13748 (__v4sf) __Y, __P,
13749 (__mmask8) __M,
13750 _MM_FROUND_CUR_DIRECTION);
13751}
13752
13753#else
13754#define _mm512_cmp_pd_mask(X, Y, P) \
13755 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13756 (__v8df)(__m512d)(Y), (int)(P),\
13757 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13758
13759#define _mm512_cmp_ps_mask(X, Y, P) \
13760 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13761 (__v16sf)(__m512)(Y), (int)(P),\
13762 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
13763
13764#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
13765 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13766 (__v8df)(__m512d)(Y), (int)(P),\
13767 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
13768
13769#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
13770 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13771 (__v16sf)(__m512)(Y), (int)(P),\
13772 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
13773
13774#define _mm_cmp_sd_mask(X, Y, P) \
13775 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13776 (__v2df)(__m128d)(Y), (int)(P),\
13777 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13778
13779#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
13780 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13781 (__v2df)(__m128d)(Y), (int)(P),\
13782 M,_MM_FROUND_CUR_DIRECTION))
13783
13784#define _mm_cmp_ss_mask(X, Y, P) \
13785 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13786 (__v4sf)(__m128)(Y), (int)(P), \
13787 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13788
13789#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
13790 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13791 (__v4sf)(__m128)(Y), (int)(P), \
13792 M,_MM_FROUND_CUR_DIRECTION))
13793#endif
13794
2196a885
KY
13795extern __inline __mmask16
13796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13797_mm512_kmov (__mmask16 __A)
13798{
7cdb6e4c 13799 return __builtin_ia32_kmovw (__A);
2196a885
KY
13800}
13801
275be1da
IT
13802extern __inline __m512
13803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13804_mm512_castpd_ps (__m512d __A)
13805{
13806 return (__m512) (__A);
13807}
13808
13809extern __inline __m512i
13810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13811_mm512_castpd_si512 (__m512d __A)
13812{
13813 return (__m512i) (__A);
13814}
13815
13816extern __inline __m512d
13817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13818_mm512_castps_pd (__m512 __A)
13819{
13820 return (__m512d) (__A);
13821}
13822
13823extern __inline __m512i
13824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13825_mm512_castps_si512 (__m512 __A)
13826{
13827 return (__m512i) (__A);
13828}
13829
13830extern __inline __m512
13831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13832_mm512_castsi512_ps (__m512i __A)
13833{
13834 return (__m512) (__A);
13835}
13836
13837extern __inline __m512d
13838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13839_mm512_castsi512_pd (__m512i __A)
13840{
13841 return (__m512d) (__A);
13842}
13843
13844extern __inline __m128d
13845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13846_mm512_castpd512_pd128 (__m512d __A)
13847{
13848 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
13849}
13850
13851extern __inline __m128
13852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13853_mm512_castps512_ps128 (__m512 __A)
13854{
13855 return _mm512_extractf32x4_ps(__A, 0);
13856}
13857
13858extern __inline __m128i
13859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13860_mm512_castsi512_si128 (__m512i __A)
13861{
13862 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13863}
13864
13865extern __inline __m256d
13866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13867_mm512_castpd512_pd256 (__m512d __A)
13868{
13869 return _mm512_extractf64x4_pd(__A, 0);
13870}
13871
13872extern __inline __m256
13873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13874_mm512_castps512_ps256 (__m512 __A)
13875{
13876 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13877}
13878
13879extern __inline __m256i
13880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13881_mm512_castsi512_si256 (__m512i __A)
13882{
13883 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13884}
13885
13886extern __inline __m512d
13887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13888_mm512_castpd128_pd512 (__m128d __A)
13889{
13890 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13891}
13892
13893extern __inline __m512
13894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13895_mm512_castps128_ps512 (__m128 __A)
13896{
13897 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13898}
13899
13900extern __inline __m512i
13901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13902_mm512_castsi128_si512 (__m128i __A)
13903{
13904 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13905}
13906
13907extern __inline __m512d
13908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13909_mm512_castpd256_pd512 (__m256d __A)
13910{
13911 return __builtin_ia32_pd512_256pd (__A);
13912}
13913
13914extern __inline __m512
13915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13916_mm512_castps256_ps512 (__m256 __A)
13917{
13918 return __builtin_ia32_ps512_256ps (__A);
13919}
13920
13921extern __inline __m512i
13922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13923_mm512_castsi256_si512 (__m256i __A)
13924{
13925 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13926}
13927
13928extern __inline __mmask16
13929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13930_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13931{
13932 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13933 (__v16si) __B, 0,
13934 (__mmask16) -1);
13935}
13936
13937extern __inline __mmask16
13938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13939_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13940{
13941 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13942 (__v16si) __B, 0, __U);
13943}
13944
13945extern __inline __mmask8
13946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13947_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13948{
13949 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13950 (__v8di) __B, 0, __U);
13951}
13952
13953extern __inline __mmask8
13954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13955_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13956{
13957 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13958 (__v8di) __B, 0,
13959 (__mmask8) -1);
13960}
13961
13962extern __inline __mmask16
13963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13964_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13965{
13966 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13967 (__v16si) __B, 6,
13968 (__mmask16) -1);
13969}
13970
13971extern __inline __mmask16
13972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13973_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13974{
13975 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13976 (__v16si) __B, 6, __U);
13977}
13978
13979extern __inline __mmask8
13980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13981_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13982{
13983 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13984 (__v8di) __B, 6, __U);
13985}
13986
13987extern __inline __mmask8
13988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13989_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13990{
13991 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13992 (__v8di) __B, 6,
13993 (__mmask8) -1);
13994}
13995
167a5b77
JJ
13996#undef __MM512_REDUCE_OP
13997#define __MM512_REDUCE_OP(op) \
13998 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
13999 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
14000 __m256i __T3 = (__m256i) (__T1 op __T2); \
14001 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
14002 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
14003 __v4si __T6 = __T4 op __T5; \
14004 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14005 __v4si __T8 = __T6 op __T7; \
14006 return __T8[0] op __T8[1]
14007
14008extern __inline int
14009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14010_mm512_reduce_add_epi32 (__m512i __A)
14011{
14012 __MM512_REDUCE_OP (+);
14013}
14014
14015extern __inline int
14016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14017_mm512_reduce_mul_epi32 (__m512i __A)
14018{
14019 __MM512_REDUCE_OP (*);
14020}
14021
14022extern __inline int
14023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14024_mm512_reduce_and_epi32 (__m512i __A)
14025{
14026 __MM512_REDUCE_OP (&);
14027}
14028
14029extern __inline int
14030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14031_mm512_reduce_or_epi32 (__m512i __A)
14032{
14033 __MM512_REDUCE_OP (|);
14034}
14035
14036extern __inline int
14037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14038_mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
14039{
14040 __A = _mm512_maskz_mov_epi32 (__U, __A);
14041 __MM512_REDUCE_OP (+);
14042}
14043
14044extern __inline int
14045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14046_mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
14047{
14048 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
14049 __MM512_REDUCE_OP (*);
14050}
14051
14052extern __inline int
14053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14054_mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
14055{
14056 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14057 __MM512_REDUCE_OP (&);
14058}
14059
14060extern __inline int
14061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14062_mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
14063{
14064 __A = _mm512_maskz_mov_epi32 (__U, __A);
14065 __MM512_REDUCE_OP (|);
14066}
14067
14068#undef __MM512_REDUCE_OP
14069#define __MM512_REDUCE_OP(op) \
14070 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
14071 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
14072 __m256i __T3 = _mm256_##op (__T1, __T2); \
14073 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
14074 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
14075 __m128i __T6 = _mm_##op (__T4, __T5); \
14076 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
14077 (__v4si) { 2, 3, 0, 1 }); \
14078 __m128i __T8 = _mm_##op (__T6, __T7); \
14079 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
14080 (__v4si) { 1, 0, 1, 0 }); \
14081 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
14082 return __T10[0]
14083
14084extern __inline int
14085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14086_mm512_reduce_min_epi32 (__m512i __A)
14087{
14088 __MM512_REDUCE_OP (min_epi32);
14089}
14090
14091extern __inline int
14092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14093_mm512_reduce_max_epi32 (__m512i __A)
14094{
14095 __MM512_REDUCE_OP (max_epi32);
14096}
14097
14098extern __inline unsigned int
14099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14100_mm512_reduce_min_epu32 (__m512i __A)
14101{
14102 __MM512_REDUCE_OP (min_epu32);
14103}
14104
14105extern __inline unsigned int
14106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14107_mm512_reduce_max_epu32 (__m512i __A)
14108{
14109 __MM512_REDUCE_OP (max_epu32);
14110}
14111
14112extern __inline int
14113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14114_mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
14115{
14116 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
14117 __MM512_REDUCE_OP (min_epi32);
14118}
14119
14120extern __inline int
14121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14122_mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
14123{
14124 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
14125 __MM512_REDUCE_OP (max_epi32);
14126}
14127
14128extern __inline unsigned int
14129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14130_mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
14131{
14132 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14133 __MM512_REDUCE_OP (min_epu32);
14134}
14135
14136extern __inline unsigned int
14137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14138_mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
14139{
14140 __A = _mm512_maskz_mov_epi32 (__U, __A);
14141 __MM512_REDUCE_OP (max_epu32);
14142}
14143
14144#undef __MM512_REDUCE_OP
14145#define __MM512_REDUCE_OP(op) \
14146 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14147 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14148 __m256 __T3 = __T1 op __T2; \
14149 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14150 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14151 __m128 __T6 = __T4 op __T5; \
14152 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14153 __m128 __T8 = __T6 op __T7; \
14154 return __T8[0] op __T8[1]
14155
14156extern __inline float
14157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14158_mm512_reduce_add_ps (__m512 __A)
14159{
14160 __MM512_REDUCE_OP (+);
14161}
14162
14163extern __inline float
14164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14165_mm512_reduce_mul_ps (__m512 __A)
14166{
14167 __MM512_REDUCE_OP (*);
14168}
14169
14170extern __inline float
14171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14172_mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
14173{
14174 __A = _mm512_maskz_mov_ps (__U, __A);
14175 __MM512_REDUCE_OP (+);
14176}
14177
14178extern __inline float
14179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14180_mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
14181{
14182 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
14183 __MM512_REDUCE_OP (*);
14184}
14185
14186#undef __MM512_REDUCE_OP
14187#define __MM512_REDUCE_OP(op) \
14188 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14189 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14190 __m256 __T3 = _mm256_##op (__T1, __T2); \
14191 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14192 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14193 __m128 __T6 = _mm_##op (__T4, __T5); \
14194 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14195 __m128 __T8 = _mm_##op (__T6, __T7); \
14196 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
14197 __m128 __T10 = _mm_##op (__T8, __T9); \
14198 return __T10[0]
14199
14200extern __inline float
14201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14202_mm512_reduce_min_ps (__m512 __A)
14203{
14204 __MM512_REDUCE_OP (min_ps);
14205}
14206
14207extern __inline float
14208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14209_mm512_reduce_max_ps (__m512 __A)
14210{
14211 __MM512_REDUCE_OP (max_ps);
14212}
14213
14214extern __inline float
14215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14216_mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
14217{
14218 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
14219 __MM512_REDUCE_OP (min_ps);
14220}
14221
14222extern __inline float
14223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14224_mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
14225{
14226 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
14227 __MM512_REDUCE_OP (max_ps);
14228}
14229
14230#undef __MM512_REDUCE_OP
14231#define __MM512_REDUCE_OP(op) \
14232 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
14233 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
14234 __m256i __T3 = (__m256i) (__T1 op __T2); \
14235 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
14236 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
14237 __v2di __T6 = __T4 op __T5; \
14238 return __T6[0] op __T6[1]
14239
14240extern __inline long long
14241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14242_mm512_reduce_add_epi64 (__m512i __A)
14243{
14244 __MM512_REDUCE_OP (+);
14245}
14246
14247extern __inline long long
14248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14249_mm512_reduce_mul_epi64 (__m512i __A)
14250{
14251 __MM512_REDUCE_OP (*);
14252}
14253
14254extern __inline long long
14255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14256_mm512_reduce_and_epi64 (__m512i __A)
14257{
14258 __MM512_REDUCE_OP (&);
14259}
14260
14261extern __inline long long
14262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14263_mm512_reduce_or_epi64 (__m512i __A)
14264{
14265 __MM512_REDUCE_OP (|);
14266}
14267
14268extern __inline long long
14269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14270_mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
14271{
14272 __A = _mm512_maskz_mov_epi64 (__U, __A);
14273 __MM512_REDUCE_OP (+);
14274}
14275
14276extern __inline long long
14277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14278_mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
14279{
14280 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
14281 __MM512_REDUCE_OP (*);
14282}
14283
14284extern __inline long long
14285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14286_mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
14287{
14288 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
14289 __MM512_REDUCE_OP (&);
14290}
14291
14292extern __inline long long
14293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14294_mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
14295{
14296 __A = _mm512_maskz_mov_epi64 (__U, __A);
14297 __MM512_REDUCE_OP (|);
14298}
14299
14300#undef __MM512_REDUCE_OP
14301#define __MM512_REDUCE_OP(op) \
14302 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
14303 __m512i __T2 = _mm512_##op (__A, __T1); \
14304 __m512i __T3 \
14305 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
14306 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
14307 __m512i __T4 = _mm512_##op (__T2, __T3); \
14308 __m512i __T5 \
14309 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
14310 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
14311 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
14312 return __T6[0]
14313
14314extern __inline long long
14315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14316_mm512_reduce_min_epi64 (__m512i __A)
14317{
14318 __MM512_REDUCE_OP (min_epi64);
14319}
14320
14321extern __inline long long
14322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14323_mm512_reduce_max_epi64 (__m512i __A)
14324{
14325 __MM512_REDUCE_OP (max_epi64);
14326}
14327
14328extern __inline long long
14329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14330_mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
14331{
14332 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
14333 __U, __A);
14334 __MM512_REDUCE_OP (min_epi64);
14335}
14336
14337extern __inline long long
14338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14339_mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
14340{
14341 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
14342 __U, __A);
14343 __MM512_REDUCE_OP (max_epi64);
14344}
14345
14346extern __inline unsigned long long
14347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14348_mm512_reduce_min_epu64 (__m512i __A)
14349{
14350 __MM512_REDUCE_OP (min_epu64);
14351}
14352
14353extern __inline unsigned long long
14354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14355_mm512_reduce_max_epu64 (__m512i __A)
14356{
14357 __MM512_REDUCE_OP (max_epu64);
14358}
14359
14360extern __inline unsigned long long
14361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14362_mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
14363{
14364 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
14365 __MM512_REDUCE_OP (min_epu64);
14366}
14367
14368extern __inline unsigned long long
14369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14370_mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
14371{
14372 __A = _mm512_maskz_mov_epi64 (__U, __A);
14373 __MM512_REDUCE_OP (max_epu64);
14374}
14375
14376#undef __MM512_REDUCE_OP
14377#define __MM512_REDUCE_OP(op) \
14378 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
14379 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
14380 __m256d __T3 = __T1 op __T2; \
14381 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
14382 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
14383 __m128d __T6 = __T4 op __T5; \
14384 return __T6[0] op __T6[1]
14385
14386extern __inline double
14387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14388_mm512_reduce_add_pd (__m512d __A)
14389{
14390 __MM512_REDUCE_OP (+);
14391}
14392
14393extern __inline double
14394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14395_mm512_reduce_mul_pd (__m512d __A)
14396{
14397 __MM512_REDUCE_OP (*);
14398}
14399
14400extern __inline double
14401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14402_mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
14403{
14404 __A = _mm512_maskz_mov_pd (__U, __A);
14405 __MM512_REDUCE_OP (+);
14406}
14407
14408extern __inline double
14409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14410_mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
14411{
14412 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
14413 __MM512_REDUCE_OP (*);
14414}
14415
14416#undef __MM512_REDUCE_OP
14417#define __MM512_REDUCE_OP(op) \
14418 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
14419 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
14420 __m256d __T3 = _mm256_##op (__T1, __T2); \
14421 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
14422 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
14423 __m128d __T6 = _mm_##op (__T4, __T5); \
14424 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
14425 __m128d __T8 = _mm_##op (__T6, __T7); \
14426 return __T8[0]
14427
14428extern __inline double
14429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14430_mm512_reduce_min_pd (__m512d __A)
14431{
14432 __MM512_REDUCE_OP (min_pd);
14433}
14434
14435extern __inline double
14436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14437_mm512_reduce_max_pd (__m512d __A)
14438{
14439 __MM512_REDUCE_OP (max_pd);
14440}
14441
14442extern __inline double
14443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14444_mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
14445{
14446 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
14447 __MM512_REDUCE_OP (min_pd);
14448}
14449
14450extern __inline double
14451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14452_mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
14453{
14454 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
14455 __MM512_REDUCE_OP (max_pd);
14456}
14457
14458#undef __MM512_REDUCE_OP
14459
756c5857
AI
14460#ifdef __DISABLE_AVX512F__
14461#undef __DISABLE_AVX512F__
14462#pragma GCC pop_options
14463#endif /* __DISABLE_AVX512F__ */
14464
14465#endif /* _AVX512FINTRIN_H_INCLUDED */