]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512fintrin.h
PR preprocessor/58580 - preprocessor goes OOM with warning for zero literals
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
23a5b65a 1/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
756c5857
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41typedef int __v16si __attribute__ ((__vector_size__ (64)));
42
43/* The Intel API is flexible enough that we must allow aliasing with other
44 vector types, and their scalar components. */
45typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
46typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
47typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
48
49typedef unsigned char __mmask8;
50typedef unsigned short __mmask16;
51
52/* Rounding mode macros. */
53#define _MM_FROUND_TO_NEAREST_INT 0x00
54#define _MM_FROUND_TO_NEG_INF 0x01
55#define _MM_FROUND_TO_POS_INF 0x02
56#define _MM_FROUND_TO_ZERO 0x03
57#define _MM_FROUND_CUR_DIRECTION 0x04
58#define _MM_FROUND_NO_EXC 0x05
59
60extern __inline __m512i
61__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
62_mm512_set_epi64 (long long __A, long long __B, long long __C,
63 long long __D, long long __E, long long __F,
64 long long __G, long long __H)
65{
66 return __extension__ (__m512i) (__v8di)
67 { __H, __G, __F, __E, __D, __C, __B, __A };
68}
69
70/* Create the vector [A B C D E F G H I J K L M N O P]. */
71extern __inline __m512i
72__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
73_mm512_set_epi32 (int __A, int __B, int __C, int __D,
74 int __E, int __F, int __G, int __H,
75 int __I, int __J, int __K, int __L,
76 int __M, int __N, int __O, int __P)
77{
78 return __extension__ (__m512i)(__v16si)
79 { __P, __O, __N, __M, __L, __K, __J, __I,
80 __H, __G, __F, __E, __D, __C, __B, __A };
81}
82
83extern __inline __m512d
84__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
85_mm512_set_pd (double __A, double __B, double __C, double __D,
86 double __E, double __F, double __G, double __H)
87{
88 return __extension__ (__m512d)
89 { __H, __G, __F, __E, __D, __C, __B, __A };
90}
91
92extern __inline __m512
93__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
94_mm512_set_ps (float __A, float __B, float __C, float __D,
95 float __E, float __F, float __G, float __H,
96 float __I, float __J, float __K, float __L,
97 float __M, float __N, float __O, float __P)
98{
99 return __extension__ (__m512)
100 { __P, __O, __N, __M, __L, __K, __J, __I,
101 __H, __G, __F, __E, __D, __C, __B, __A };
102}
103
104#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
105 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
106
107#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
108 e8,e9,e10,e11,e12,e13,e14,e15) \
109 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
110
111#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
112 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
113
114#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
115 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
116
117extern __inline __m512
118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119_mm512_setzero_ps (void)
120{
121 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
122 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
123}
124
125extern __inline __m512d
126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127_mm512_setzero_pd (void)
128{
129 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
130}
131
132extern __inline __m512i
133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
134_mm512_setzero_si512 (void)
135{
136 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
137}
138
139extern __inline __m512d
140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
141_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
142{
143 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
144 (__v8df) __W,
145 (__mmask8) __U);
146}
147
148extern __inline __m512d
149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
150_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
151{
152 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
153 (__v8df)
154 _mm512_setzero_pd (),
155 (__mmask8) __U);
156}
157
158extern __inline __m512
159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
160_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
161{
162 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
163 (__v16sf) __W,
164 (__mmask16) __U);
165}
166
167extern __inline __m512
168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
169_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
170{
171 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
172 (__v16sf)
173 _mm512_setzero_ps (),
174 (__mmask16) __U);
175}
176
177extern __inline __m512d
178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179_mm512_load_pd (void const *__P)
180{
181 return *(__m512d *) __P;
182}
183
184extern __inline __m512d
185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
187{
188 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
189 (__v8df) __W,
190 (__mmask8) __U);
191}
192
193extern __inline __m512d
194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
195_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
196{
197 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
198 (__v8df)
199 _mm512_setzero_pd (),
200 (__mmask8) __U);
201}
202
203extern __inline void
204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
205_mm512_store_pd (void *__P, __m512d __A)
206{
207 *(__m512d *) __P = __A;
208}
209
210extern __inline void
211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
213{
214 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
215 (__mmask8) __U);
216}
217
218extern __inline __m512
219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220_mm512_load_ps (void const *__P)
221{
222 return *(__m512 *) __P;
223}
224
225extern __inline __m512
226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
227_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
228{
229 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
230 (__v16sf) __W,
231 (__mmask16) __U);
232}
233
234extern __inline __m512
235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
236_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
237{
238 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
239 (__v16sf)
240 _mm512_setzero_ps (),
241 (__mmask16) __U);
242}
243
244extern __inline void
245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
246_mm512_store_ps (void *__P, __m512 __A)
247{
248 *(__m512 *) __P = __A;
249}
250
251extern __inline void
252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
253_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
254{
255 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
256 (__mmask16) __U);
257}
258
259extern __inline __m512i
260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
262{
263 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
264 (__v8di) __W,
265 (__mmask8) __U);
266}
267
268extern __inline __m512i
269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
270_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
271{
272 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
273 (__v8di)
274 _mm512_setzero_si512 (),
275 (__mmask8) __U);
276}
277
278extern __inline __m512i
279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280_mm512_load_epi64 (void const *__P)
281{
282 return *(__m512i *) __P;
283}
284
285extern __inline __m512i
286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
288{
289 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
290 (__v8di) __W,
291 (__mmask8) __U);
292}
293
294extern __inline __m512i
295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
296_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
297{
298 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
299 (__v8di)
300 _mm512_setzero_si512 (),
301 (__mmask8) __U);
302}
303
304extern __inline void
305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
306_mm512_store_epi64 (void *__P, __m512i __A)
307{
308 *(__m512i *) __P = __A;
309}
310
311extern __inline void
312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
313_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
314{
315 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
316 (__mmask8) __U);
317}
318
319extern __inline __m512i
320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
321_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
322{
323 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
324 (__v16si) __W,
325 (__mmask16) __U);
326}
327
328extern __inline __m512i
329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
331{
332 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
333 (__v16si)
334 _mm512_setzero_si512 (),
335 (__mmask16) __U);
336}
337
338extern __inline __m512i
339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
340_mm512_load_si512 (void const *__P)
341{
342 return *(__m512i *) __P;
343}
344
345extern __inline __m512i
346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347_mm512_load_epi32 (void const *__P)
348{
349 return *(__m512i *) __P;
350}
351
352extern __inline __m512i
353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
354_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
355{
356 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
357 (__v16si) __W,
358 (__mmask16) __U);
359}
360
361extern __inline __m512i
362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
363_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
364{
365 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
366 (__v16si)
367 _mm512_setzero_si512 (),
368 (__mmask16) __U);
369}
370
371extern __inline void
372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
373_mm512_store_si512 (void *__P, __m512i __A)
374{
375 *(__m512i *) __P = __A;
376}
377
378extern __inline void
379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
380_mm512_store_epi32 (void *__P, __m512i __A)
381{
382 *(__m512i *) __P = __A;
383}
384
385extern __inline void
386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
388{
389 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
390 (__mmask16) __U);
391}
392
393extern __inline __m512i
394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395_mm512_mullo_epi32 (__m512i __A, __m512i __B)
396{
397 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
398 (__v16si) __B,
399 (__v16si)
400 _mm512_setzero_si512 (),
401 (__mmask16) -1);
402}
403
404extern __inline __m512i
405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
406_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
407{
408 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
409 (__v16si) __B,
410 (__v16si)
411 _mm512_setzero_si512 (),
412 __M);
413}
414
415extern __inline __m512i
416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
418{
419 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
420 (__v16si) __B,
421 (__v16si) __W, __M);
422}
423
424extern __inline __m512i
425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
427{
428 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
429 (__v16si) __Y,
430 (__v16si)
431 _mm512_setzero_si512 (),
432 (__mmask16) -1);
433}
434
435extern __inline __m512i
436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
437_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
438{
439 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
440 (__v16si) __Y,
441 (__v16si) __W,
442 (__mmask16) __U);
443}
444
445extern __inline __m512i
446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
448{
449 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
450 (__v16si) __Y,
451 (__v16si)
452 _mm512_setzero_si512 (),
453 (__mmask16) __U);
454}
455
456extern __inline __m512i
457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458_mm512_srav_epi32 (__m512i __X, __m512i __Y)
459{
460 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
461 (__v16si) __Y,
462 (__v16si)
463 _mm512_setzero_si512 (),
464 (__mmask16) -1);
465}
466
467extern __inline __m512i
468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
470{
471 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
472 (__v16si) __Y,
473 (__v16si) __W,
474 (__mmask16) __U);
475}
476
477extern __inline __m512i
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
480{
481 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
482 (__v16si) __Y,
483 (__v16si)
484 _mm512_setzero_si512 (),
485 (__mmask16) __U);
486}
487
488extern __inline __m512i
489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
490_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
491{
492 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
493 (__v16si) __Y,
494 (__v16si)
495 _mm512_setzero_si512 (),
496 (__mmask16) -1);
497}
498
499extern __inline __m512i
500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
502{
503 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
504 (__v16si) __Y,
505 (__v16si) __W,
506 (__mmask16) __U);
507}
508
509extern __inline __m512i
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
512{
513 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
514 (__v16si) __Y,
515 (__v16si)
516 _mm512_setzero_si512 (),
517 (__mmask16) __U);
518}
519
520extern __inline __m512i
521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
522_mm512_add_epi64 (__m512i __A, __m512i __B)
523{
524 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
525 (__v8di) __B,
526 (__v8di)
527 _mm512_setzero_si512 (),
528 (__mmask8) -1);
529}
530
531extern __inline __m512i
532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
533_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
534{
535 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
536 (__v8di) __B,
537 (__v8di) __W,
538 (__mmask8) __U);
539}
540
541extern __inline __m512i
542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
544{
545 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
546 (__v8di) __B,
547 (__v8di)
548 _mm512_setzero_si512 (),
549 (__mmask8) __U);
550}
551
552extern __inline __m512i
553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554_mm512_sub_epi64 (__m512i __A, __m512i __B)
555{
556 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
557 (__v8di) __B,
558 (__v8di)
559 _mm512_setzero_pd (),
560 (__mmask8) -1);
561}
562
563extern __inline __m512i
564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
565_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
566{
567 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
568 (__v8di) __B,
569 (__v8di) __W,
570 (__mmask8) __U);
571}
572
573extern __inline __m512i
574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
576{
577 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
578 (__v8di) __B,
579 (__v8di)
580 _mm512_setzero_si512 (),
581 (__mmask8) __U);
582}
583
584extern __inline __m512i
585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
586_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
587{
588 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
589 (__v8di) __Y,
590 (__v8di)
591 _mm512_setzero_pd (),
592 (__mmask8) -1);
593}
594
595extern __inline __m512i
596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
598{
599 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
600 (__v8di) __Y,
601 (__v8di) __W,
602 (__mmask8) __U);
603}
604
605extern __inline __m512i
606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
608{
609 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
610 (__v8di) __Y,
611 (__v8di)
612 _mm512_setzero_si512 (),
613 (__mmask8) __U);
614}
615
616extern __inline __m512i
617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618_mm512_srav_epi64 (__m512i __X, __m512i __Y)
619{
620 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
621 (__v8di) __Y,
622 (__v8di)
623 _mm512_setzero_si512 (),
624 (__mmask8) -1);
625}
626
627extern __inline __m512i
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
630{
631 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
632 (__v8di) __Y,
633 (__v8di) __W,
634 (__mmask8) __U);
635}
636
637extern __inline __m512i
638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
639_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
640{
641 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
642 (__v8di) __Y,
643 (__v8di)
644 _mm512_setzero_si512 (),
645 (__mmask8) __U);
646}
647
648extern __inline __m512i
649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
651{
652 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
653 (__v8di) __Y,
654 (__v8di)
655 _mm512_setzero_si512 (),
656 (__mmask8) -1);
657}
658
659extern __inline __m512i
660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
661_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
662{
663 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
664 (__v8di) __Y,
665 (__v8di) __W,
666 (__mmask8) __U);
667}
668
669extern __inline __m512i
670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
671_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
672{
673 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
674 (__v8di) __Y,
675 (__v8di)
676 _mm512_setzero_si512 (),
677 (__mmask8) __U);
678}
679
680extern __inline __m512i
681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
682_mm512_add_epi32 (__m512i __A, __m512i __B)
683{
684 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
685 (__v16si) __B,
686 (__v16si)
687 _mm512_setzero_si512 (),
688 (__mmask16) -1);
689}
690
691extern __inline __m512i
692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
693_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
694{
695 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
696 (__v16si) __B,
697 (__v16si) __W,
698 (__mmask16) __U);
699}
700
701extern __inline __m512i
702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
703_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
704{
705 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
706 (__v16si) __B,
707 (__v16si)
708 _mm512_setzero_si512 (),
709 (__mmask16) __U);
710}
711
712extern __inline __m512i
713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
714_mm512_mul_epi32 (__m512i __X, __m512i __Y)
715{
716 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
717 (__v16si) __Y,
718 (__v8di)
719 _mm512_setzero_si512 (),
720 (__mmask8) -1);
721}
722
723extern __inline __m512i
724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
725_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
726{
727 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
728 (__v16si) __Y,
729 (__v8di) __W, __M);
730}
731
732extern __inline __m512i
733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
734_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
735{
736 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
737 (__v16si) __Y,
738 (__v8di)
739 _mm512_setzero_si512 (),
740 __M);
741}
742
743extern __inline __m512i
744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
745_mm512_sub_epi32 (__m512i __A, __m512i __B)
746{
747 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
748 (__v16si) __B,
749 (__v16si)
750 _mm512_setzero_si512 (),
751 (__mmask16) -1);
752}
753
754extern __inline __m512i
755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
756_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
757{
758 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
759 (__v16si) __B,
760 (__v16si) __W,
761 (__mmask16) __U);
762}
763
764extern __inline __m512i
765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
766_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
767{
768 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
769 (__v16si) __B,
770 (__v16si)
771 _mm512_setzero_si512 (),
772 (__mmask16) __U);
773}
774
775extern __inline __m512i
776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777_mm512_mul_epu32 (__m512i __X, __m512i __Y)
778{
779 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
780 (__v16si) __Y,
781 (__v8di)
782 _mm512_setzero_si512 (),
783 (__mmask8) -1);
784}
785
786extern __inline __m512i
787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
788_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
789{
790 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
791 (__v16si) __Y,
792 (__v8di) __W, __M);
793}
794
795extern __inline __m512i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
798{
799 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
800 (__v16si) __Y,
801 (__v8di)
802 _mm512_setzero_si512 (),
803 __M);
804}
805
806#ifdef __OPTIMIZE__
807extern __inline __m512i
808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809_mm512_slli_epi64 (__m512i __A, unsigned int __B)
810{
811 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
812 (__v8di)
813 _mm512_setzero_si512 (),
814 (__mmask8) -1);
815}
816
817extern __inline __m512i
818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
819_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
820 unsigned int __B)
821{
822 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
823 (__v8di) __W,
824 (__mmask8) __U);
825}
826
827extern __inline __m512i
828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
830{
831 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
832 (__v8di)
833 _mm512_setzero_si512 (),
834 (__mmask8) __U);
835}
836#else
837#define _mm512_slli_epi64(X, C) \
838 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
839 (__v8di)(__m512i)_mm512_setzero_si512 (),\
840 (__mmask8)-1))
841
842#define _mm512_mask_slli_epi64(W, U, X, C) \
843 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
844 (__v8di)(__m512i)(W),\
845 (__mmask8)(U)))
846
847#define _mm512_maskz_slli_epi64(U, X, C) \
848 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
849 (__v8di)(__m512i)_mm512_setzero_si512 (),\
850 (__mmask8)(U)))
851#endif
852
853extern __inline __m512i
854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
855_mm512_sll_epi64 (__m512i __A, __m128i __B)
856{
857 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
858 (__v2di) __B,
859 (__v8di)
860 _mm512_setzero_si512 (),
861 (__mmask8) -1);
862}
863
864extern __inline __m512i
865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
867{
868 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
869 (__v2di) __B,
870 (__v8di) __W,
871 (__mmask8) __U);
872}
873
874extern __inline __m512i
875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
876_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
877{
878 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
879 (__v2di) __B,
880 (__v8di)
881 _mm512_setzero_si512 (),
882 (__mmask8) __U);
883}
884
885#ifdef __OPTIMIZE__
886extern __inline __m512i
887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
888_mm512_srli_epi64 (__m512i __A, unsigned int __B)
889{
890 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
891 (__v8di)
892 _mm512_setzero_si512 (),
893 (__mmask8) -1);
894}
895
896extern __inline __m512i
897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
898_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
899 __m512i __A, unsigned int __B)
900{
901 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
902 (__v8di) __W,
903 (__mmask8) __U);
904}
905
906extern __inline __m512i
907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
909{
910 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
911 (__v8di)
912 _mm512_setzero_si512 (),
913 (__mmask8) __U);
914}
915#else
916#define _mm512_srli_epi64(X, C) \
917 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
918 (__v8di)(__m512i)_mm512_setzero_si512 (),\
919 (__mmask8)-1))
920
921#define _mm512_mask_srli_epi64(W, U, X, C) \
922 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
923 (__v8di)(__m512i)(W),\
924 (__mmask8)(U)))
925
926#define _mm512_maskz_srli_epi64(U, X, C) \
927 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
928 (__v8di)(__m512i)_mm512_setzero_si512 (),\
929 (__mmask8)(U)))
930#endif
931
932extern __inline __m512i
933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934_mm512_srl_epi64 (__m512i __A, __m128i __B)
935{
936 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
937 (__v2di) __B,
938 (__v8di)
939 _mm512_setzero_si512 (),
940 (__mmask8) -1);
941}
942
943extern __inline __m512i
944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
945_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
946{
947 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
948 (__v2di) __B,
949 (__v8di) __W,
950 (__mmask8) __U);
951}
952
953extern __inline __m512i
954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
955_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
956{
957 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
958 (__v2di) __B,
959 (__v8di)
960 _mm512_setzero_si512 (),
961 (__mmask8) __U);
962}
963
964#ifdef __OPTIMIZE__
965extern __inline __m512i
966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967_mm512_srai_epi64 (__m512i __A, unsigned int __B)
968{
969 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
970 (__v8di)
971 _mm512_setzero_si512 (),
972 (__mmask8) -1);
973}
974
975extern __inline __m512i
976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
977_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
978 unsigned int __B)
979{
980 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
981 (__v8di) __W,
982 (__mmask8) __U);
983}
984
985extern __inline __m512i
986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
988{
989 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
990 (__v8di)
991 _mm512_setzero_si512 (),
992 (__mmask8) __U);
993}
994#else
995#define _mm512_srai_epi64(X, C) \
996 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
997 (__v8di)(__m512i)_mm512_setzero_si512 (),\
998 (__mmask8)-1))
999
1000#define _mm512_mask_srai_epi64(W, U, X, C) \
1001 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1002 (__v8di)(__m512i)(W),\
1003 (__mmask8)(U)))
1004
1005#define _mm512_maskz_srai_epi64(U, X, C) \
1006 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1007 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1008 (__mmask8)(U)))
1009#endif
1010
1011extern __inline __m512i
1012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013_mm512_sra_epi64 (__m512i __A, __m128i __B)
1014{
1015 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1016 (__v2di) __B,
1017 (__v8di)
1018 _mm512_setzero_si512 (),
1019 (__mmask8) -1);
1020}
1021
1022extern __inline __m512i
1023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1024_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1025{
1026 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1027 (__v2di) __B,
1028 (__v8di) __W,
1029 (__mmask8) __U);
1030}
1031
1032extern __inline __m512i
1033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1034_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1035{
1036 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1037 (__v2di) __B,
1038 (__v8di)
1039 _mm512_setzero_si512 (),
1040 (__mmask8) __U);
1041}
1042
1043#ifdef __OPTIMIZE__
1044extern __inline __m512i
1045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1046_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1047{
1048 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1049 (__v16si)
1050 _mm512_setzero_si512 (),
1051 (__mmask16) -1);
1052}
1053
1054extern __inline __m512i
1055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1056_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1057 unsigned int __B)
1058{
1059 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1060 (__v16si) __W,
1061 (__mmask16) __U);
1062}
1063
1064extern __inline __m512i
1065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1066_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1067{
1068 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1069 (__v16si)
1070 _mm512_setzero_si512 (),
1071 (__mmask16) __U);
1072}
1073#else
1074#define _mm512_slli_epi32(X, C) \
1075 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1076 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1077 (__mmask16)-1))
1078
1079#define _mm512_mask_slli_epi32(W, U, X, C) \
1080 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1081 (__v16si)(__m512i)(W),\
1082 (__mmask16)(U)))
1083
1084#define _mm512_maskz_slli_epi32(U, X, C) \
1085 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1086 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1087 (__mmask16)(U)))
1088#endif
1089
1090extern __inline __m512i
1091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092_mm512_sll_epi32 (__m512i __A, __m128i __B)
1093{
1094 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1095 (__v4si) __B,
1096 (__v16si)
1097 _mm512_setzero_si512 (),
1098 (__mmask16) -1);
1099}
1100
1101extern __inline __m512i
1102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1104{
1105 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1106 (__v4si) __B,
1107 (__v16si) __W,
1108 (__mmask16) __U);
1109}
1110
1111extern __inline __m512i
1112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1113_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1114{
1115 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1116 (__v4si) __B,
1117 (__v16si)
1118 _mm512_setzero_si512 (),
1119 (__mmask16) __U);
1120}
1121
1122#ifdef __OPTIMIZE__
1123extern __inline __m512i
1124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1126{
1127 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1128 (__v16si)
1129 _mm512_setzero_si512 (),
1130 (__mmask16) -1);
1131}
1132
1133extern __inline __m512i
1134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1135_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1136 __m512i __A, unsigned int __B)
1137{
1138 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1139 (__v16si) __W,
1140 (__mmask16) __U);
1141}
1142
1143extern __inline __m512i
1144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1145_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1146{
1147 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1148 (__v16si)
1149 _mm512_setzero_si512 (),
1150 (__mmask16) __U);
1151}
1152#else
1153#define _mm512_srli_epi32(X, C) \
1154 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1155 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1156 (__mmask16)-1))
1157
1158#define _mm512_mask_srli_epi32(W, U, X, C) \
1159 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1160 (__v16si)(__m512i)(W),\
1161 (__mmask16)(U)))
1162
1163#define _mm512_maskz_srli_epi32(U, X, C) \
1164 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1165 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1166 (__mmask16)(U)))
1167#endif
1168
1169extern __inline __m512i
1170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171_mm512_srl_epi32 (__m512i __A, __m128i __B)
1172{
1173 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1174 (__v4si) __B,
1175 (__v16si)
1176 _mm512_setzero_si512 (),
1177 (__mmask16) -1);
1178}
1179
1180extern __inline __m512i
1181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1182_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1183{
1184 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1185 (__v4si) __B,
1186 (__v16si) __W,
1187 (__mmask16) __U);
1188}
1189
1190extern __inline __m512i
1191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1192_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1193{
1194 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1195 (__v4si) __B,
1196 (__v16si)
1197 _mm512_setzero_si512 (),
1198 (__mmask16) __U);
1199}
1200
1201#ifdef __OPTIMIZE__
1202extern __inline __m512i
1203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1204_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1205{
1206 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1207 (__v16si)
1208 _mm512_setzero_si512 (),
1209 (__mmask16) -1);
1210}
1211
1212extern __inline __m512i
1213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1215 unsigned int __B)
1216{
1217 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1218 (__v16si) __W,
1219 (__mmask16) __U);
1220}
1221
1222extern __inline __m512i
1223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1224_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1225{
1226 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1227 (__v16si)
1228 _mm512_setzero_si512 (),
1229 (__mmask16) __U);
1230}
1231#else
1232#define _mm512_srai_epi32(X, C) \
1233 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1234 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1235 (__mmask16)-1))
1236
1237#define _mm512_mask_srai_epi32(W, U, X, C) \
1238 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1239 (__v16si)(__m512i)(W),\
1240 (__mmask16)(U)))
1241
1242#define _mm512_maskz_srai_epi32(U, X, C) \
1243 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1244 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1245 (__mmask16)(U)))
1246#endif
1247
1248extern __inline __m512i
1249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250_mm512_sra_epi32 (__m512i __A, __m128i __B)
1251{
1252 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1253 (__v4si) __B,
1254 (__v16si)
1255 _mm512_setzero_si512 (),
1256 (__mmask16) -1);
1257}
1258
1259extern __inline __m512i
1260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1261_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1262{
1263 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1264 (__v4si) __B,
1265 (__v16si) __W,
1266 (__mmask16) __U);
1267}
1268
1269extern __inline __m512i
1270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1271_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1272{
1273 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1274 (__v4si) __B,
1275 (__v16si)
1276 _mm512_setzero_si512 (),
1277 (__mmask16) __U);
1278}
1279
075691af
AI
1280#ifdef __OPTIMIZE__
1281extern __inline __m128d
1282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1284{
1285 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1286 (__v2df) __B,
1287 __R);
1288}
1289
1290extern __inline __m128
1291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1293{
1294 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1295 (__v4sf) __B,
1296 __R);
1297}
1298
1299extern __inline __m128d
1300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1301_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1302{
1303 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1304 (__v2df) __B,
1305 __R);
1306}
1307
1308extern __inline __m128
1309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1311{
1312 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1313 (__v4sf) __B,
1314 __R);
1315}
1316
1317#else
1318#define _mm_add_round_sd(A, B, C) \
1319 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1320
1321#define _mm_add_round_ss(A, B, C) \
1322 (__m128)__builtin_ia32_addss_round(A, B, C)
1323
1324#define _mm_sub_round_sd(A, B, C) \
1325 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1326
1327#define _mm_sub_round_ss(A, B, C) \
1328 (__m128)__builtin_ia32_subss_round(A, B, C)
1329#endif
1330
756c5857
AI
1331#ifdef __OPTIMIZE__
1332extern __inline __m512i
1333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1334_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1335{
1336 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1337 (__v8di) __B,
1338 (__v8di) __C, imm,
1339 (__mmask8) -1);
1340}
1341
1342extern __inline __m512i
1343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1344_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1345 __m512i __C, const int imm)
1346{
1347 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1348 (__v8di) __B,
1349 (__v8di) __C, imm,
1350 (__mmask8) __U);
1351}
1352
1353extern __inline __m512i
1354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1356 __m512i __C, const int imm)
1357{
1358 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1359 (__v8di) __B,
1360 (__v8di) __C,
1361 imm, (__mmask8) __U);
1362}
1363
1364extern __inline __m512i
1365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1367{
1368 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1369 (__v16si) __B,
1370 (__v16si) __C,
1371 imm, (__mmask16) -1);
1372}
1373
1374extern __inline __m512i
1375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1377 __m512i __C, const int imm)
1378{
1379 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1380 (__v16si) __B,
1381 (__v16si) __C,
1382 imm, (__mmask16) __U);
1383}
1384
1385extern __inline __m512i
1386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1387_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1388 __m512i __C, const int imm)
1389{
1390 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1391 (__v16si) __B,
1392 (__v16si) __C,
1393 imm, (__mmask16) __U);
1394}
1395#else
1396#define _mm512_ternarylogic_epi64(A, B, C, I) \
1397 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1398 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1399#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1400 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1401 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1402#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1403 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1404 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1405#define _mm512_ternarylogic_epi32(A, B, C, I) \
1406 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1407 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1408 (__mmask16)-1))
1409#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1410 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1411 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1412 (__mmask16)(U)))
1413#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1414 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1415 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1416 (__mmask16)(U)))
1417#endif
1418
1419extern __inline __m512d
1420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1421_mm512_rcp14_pd (__m512d __A)
1422{
1423 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1424 (__v8df)
1425 _mm512_setzero_pd (),
1426 (__mmask8) -1);
1427}
1428
1429extern __inline __m512d
1430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1432{
1433 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1434 (__v8df) __W,
1435 (__mmask8) __U);
1436}
1437
1438extern __inline __m512d
1439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1440_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1441{
1442 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1443 (__v8df)
1444 _mm512_setzero_pd (),
1445 (__mmask8) __U);
1446}
1447
1448extern __inline __m512
1449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1450_mm512_rcp14_ps (__m512 __A)
1451{
1452 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1453 (__v16sf)
1454 _mm512_setzero_ps (),
1455 (__mmask16) -1);
1456}
1457
1458extern __inline __m512
1459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1461{
1462 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1463 (__v16sf) __W,
1464 (__mmask16) __U);
1465}
1466
1467extern __inline __m512
1468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1469_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1470{
1471 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1472 (__v16sf)
1473 _mm512_setzero_ps (),
1474 (__mmask16) __U);
1475}
1476
075691af
AI
1477extern __inline __m128d
1478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1479_mm_rcp14_sd (__m128d __A, __m128d __B)
1480{
1481 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
1482 (__v2df) __B);
1483}
1484
1485extern __inline __m128
1486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1487_mm_rcp14_ss (__m128 __A, __m128 __B)
1488{
1489 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
1490 (__v4sf) __B);
1491}
1492
756c5857
AI
1493extern __inline __m512d
1494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495_mm512_rsqrt14_pd (__m512d __A)
1496{
1497 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1498 (__v8df)
1499 _mm512_setzero_pd (),
1500 (__mmask8) -1);
1501}
1502
1503extern __inline __m512d
1504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1505_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1506{
1507 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1508 (__v8df) __W,
1509 (__mmask8) __U);
1510}
1511
1512extern __inline __m512d
1513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1514_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1515{
1516 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1517 (__v8df)
1518 _mm512_setzero_pd (),
1519 (__mmask8) __U);
1520}
1521
1522extern __inline __m512
1523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1524_mm512_rsqrt14_ps (__m512 __A)
1525{
1526 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1527 (__v16sf)
1528 _mm512_setzero_ps (),
1529 (__mmask16) -1);
1530}
1531
1532extern __inline __m512
1533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1534_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1535{
1536 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1537 (__v16sf) __W,
1538 (__mmask16) __U);
1539}
1540
1541extern __inline __m512
1542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1543_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1544{
1545 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1546 (__v16sf)
1547 _mm512_setzero_ps (),
1548 (__mmask16) __U);
1549}
1550
075691af
AI
1551extern __inline __m128d
1552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1554{
1555 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
1556 (__v2df) __B);
1557}
1558
1559extern __inline __m128
1560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1561_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1562{
1563 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
1564 (__v4sf) __B);
1565}
1566
756c5857
AI
1567#ifdef __OPTIMIZE__
1568extern __inline __m512d
1569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570_mm512_sqrt_round_pd (__m512d __A, const int __R)
1571{
1572 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1573 (__v8df)
1574 _mm512_setzero_pd (),
1575 (__mmask8) -1, __R);
1576}
1577
1578extern __inline __m512d
1579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1580_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1581 const int __R)
1582{
1583 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1584 (__v8df) __W,
1585 (__mmask8) __U, __R);
1586}
1587
1588extern __inline __m512d
1589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1591{
1592 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1593 (__v8df)
1594 _mm512_setzero_pd (),
1595 (__mmask8) __U, __R);
1596}
1597
1598extern __inline __m512
1599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600_mm512_sqrt_round_ps (__m512 __A, const int __R)
1601{
1602 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1603 (__v16sf)
1604 _mm512_setzero_ps (),
1605 (__mmask16) -1, __R);
1606}
1607
1608extern __inline __m512
1609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1611{
1612 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1613 (__v16sf) __W,
1614 (__mmask16) __U, __R);
1615}
1616
1617extern __inline __m512
1618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1620{
1621 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1622 (__v16sf)
1623 _mm512_setzero_ps (),
1624 (__mmask16) __U, __R);
1625}
1626
075691af
AI
1627extern __inline __m128d
1628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1629_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1630{
1631 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1632 (__v2df) __A,
1633 __R);
1634}
1635
1636extern __inline __m128
1637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1638_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1639{
1640 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1641 (__v4sf) __A,
1642 __R);
1643}
756c5857
AI
1644#else
1645#define _mm512_sqrt_round_pd(A, C) \
1646 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
1647
1648#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1649 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1650
1651#define _mm512_maskz_sqrt_round_pd(U, A, C) \
1652 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1653
1654#define _mm512_sqrt_round_ps(A, C) \
1655 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
1656
1657#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1658 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1659
1660#define _mm512_maskz_sqrt_round_ps(U, A, C) \
1661 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
1662
1663#define _mm_sqrt_round_sd(A, B, C) \
1664 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1665
1666#define _mm_sqrt_round_ss(A, B, C) \
1667 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
756c5857
AI
1668#endif
1669
1670extern __inline __m512i
1671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1672_mm512_cvtepi8_epi32 (__m128i __A)
1673{
1674 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1675 (__v16si)
1676 _mm512_setzero_si512 (),
1677 (__mmask16) -1);
1678}
1679
1680extern __inline __m512i
1681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1682_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1683{
1684 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1685 (__v16si) __W,
1686 (__mmask16) __U);
1687}
1688
1689extern __inline __m512i
1690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1691_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1692{
1693 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1694 (__v16si)
1695 _mm512_setzero_si512 (),
1696 (__mmask16) __U);
1697}
1698
1699extern __inline __m512i
1700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1701_mm512_cvtepi8_epi64 (__m128i __A)
1702{
1703 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1704 (__v8di)
1705 _mm512_setzero_si512 (),
1706 (__mmask8) -1);
1707}
1708
1709extern __inline __m512i
1710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1711_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1712{
1713 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1714 (__v8di) __W,
1715 (__mmask8) __U);
1716}
1717
1718extern __inline __m512i
1719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1721{
1722 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1723 (__v8di)
1724 _mm512_setzero_si512 (),
1725 (__mmask8) __U);
1726}
1727
1728extern __inline __m512i
1729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730_mm512_cvtepi16_epi32 (__m256i __A)
1731{
1732 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1733 (__v16si)
1734 _mm512_setzero_si512 (),
1735 (__mmask16) -1);
1736}
1737
1738extern __inline __m512i
1739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1741{
1742 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1743 (__v16si) __W,
1744 (__mmask16) __U);
1745}
1746
1747extern __inline __m512i
1748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1749_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1750{
1751 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1752 (__v16si)
1753 _mm512_setzero_si512 (),
1754 (__mmask16) __U);
1755}
1756
1757extern __inline __m512i
1758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1759_mm512_cvtepi16_epi64 (__m128i __A)
1760{
1761 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1762 (__v8di)
1763 _mm512_setzero_si512 (),
1764 (__mmask8) -1);
1765}
1766
1767extern __inline __m512i
1768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1769_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1770{
1771 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1772 (__v8di) __W,
1773 (__mmask8) __U);
1774}
1775
1776extern __inline __m512i
1777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1778_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1779{
1780 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1781 (__v8di)
1782 _mm512_setzero_si512 (),
1783 (__mmask8) __U);
1784}
1785
1786extern __inline __m512i
1787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1788_mm512_cvtepi32_epi64 (__m256i __X)
1789{
1790 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1791 (__v8di)
1792 _mm512_setzero_si512 (),
1793 (__mmask8) -1);
1794}
1795
1796extern __inline __m512i
1797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1799{
1800 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1801 (__v8di) __W,
1802 (__mmask8) __U);
1803}
1804
1805extern __inline __m512i
1806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1807_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1808{
1809 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1810 (__v8di)
1811 _mm512_setzero_si512 (),
1812 (__mmask8) __U);
1813}
1814
1815extern __inline __m512i
1816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1817_mm512_cvtepu8_epi32 (__m128i __A)
1818{
1819 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1820 (__v16si)
1821 _mm512_setzero_si512 (),
1822 (__mmask16) -1);
1823}
1824
1825extern __inline __m512i
1826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1827_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1828{
1829 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1830 (__v16si) __W,
1831 (__mmask16) __U);
1832}
1833
1834extern __inline __m512i
1835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1836_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1837{
1838 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1839 (__v16si)
1840 _mm512_setzero_si512 (),
1841 (__mmask16) __U);
1842}
1843
1844extern __inline __m512i
1845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846_mm512_cvtepu8_epi64 (__m128i __A)
1847{
1848 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1849 (__v8di)
1850 _mm512_setzero_si512 (),
1851 (__mmask8) -1);
1852}
1853
1854extern __inline __m512i
1855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1856_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1857{
1858 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1859 (__v8di) __W,
1860 (__mmask8) __U);
1861}
1862
1863extern __inline __m512i
1864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1865_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1866{
1867 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1868 (__v8di)
1869 _mm512_setzero_si512 (),
1870 (__mmask8) __U);
1871}
1872
1873extern __inline __m512i
1874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1875_mm512_cvtepu16_epi32 (__m256i __A)
1876{
1877 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1878 (__v16si)
1879 _mm512_setzero_si512 (),
1880 (__mmask16) -1);
1881}
1882
1883extern __inline __m512i
1884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1886{
1887 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1888 (__v16si) __W,
1889 (__mmask16) __U);
1890}
1891
1892extern __inline __m512i
1893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1894_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
1895{
1896 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1897 (__v16si)
1898 _mm512_setzero_si512 (),
1899 (__mmask16) __U);
1900}
1901
1902extern __inline __m512i
1903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1904_mm512_cvtepu16_epi64 (__m128i __A)
1905{
1906 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
1907 (__v8di)
1908 _mm512_setzero_si512 (),
1909 (__mmask8) -1);
1910}
1911
1912extern __inline __m512i
1913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1914_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1915{
1916 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
1917 (__v8di) __W,
1918 (__mmask8) __U);
1919}
1920
1921extern __inline __m512i
1922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
1924{
1925 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
1926 (__v8di)
1927 _mm512_setzero_si512 (),
1928 (__mmask8) __U);
1929}
1930
1931extern __inline __m512i
1932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1933_mm512_cvtepu32_epi64 (__m256i __X)
1934{
1935 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
1936 (__v8di)
1937 _mm512_setzero_si512 (),
1938 (__mmask8) -1);
1939}
1940
1941extern __inline __m512i
1942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1943_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1944{
1945 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
1946 (__v8di) __W,
1947 (__mmask8) __U);
1948}
1949
1950extern __inline __m512i
1951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1952_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
1953{
1954 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
1955 (__v8di)
1956 _mm512_setzero_si512 (),
1957 (__mmask8) __U);
1958}
1959
1960#ifdef __OPTIMIZE__
1961extern __inline __m512d
1962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1963_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
1964{
1965 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1966 (__v8df) __B,
1967 (__v8df)
1968 _mm512_setzero_pd (),
1969 (__mmask8) -1, __R);
1970}
1971
1972extern __inline __m512d
1973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1974_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1975 __m512d __B, const int __R)
1976{
1977 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1978 (__v8df) __B,
1979 (__v8df) __W,
1980 (__mmask8) __U, __R);
1981}
1982
1983extern __inline __m512d
1984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1985_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
1986 const int __R)
1987{
1988 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1989 (__v8df) __B,
1990 (__v8df)
1991 _mm512_setzero_pd (),
1992 (__mmask8) __U, __R);
1993}
1994
1995extern __inline __m512
1996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1997_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
1998{
1999 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2000 (__v16sf) __B,
2001 (__v16sf)
2002 _mm512_setzero_ps (),
2003 (__mmask16) -1, __R);
2004}
2005
2006extern __inline __m512
2007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2009 __m512 __B, const int __R)
2010{
2011 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2012 (__v16sf) __B,
2013 (__v16sf) __W,
2014 (__mmask16) __U, __R);
2015}
2016
2017extern __inline __m512
2018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2019_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2020{
2021 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2022 (__v16sf) __B,
2023 (__v16sf)
2024 _mm512_setzero_ps (),
2025 (__mmask16) __U, __R);
2026}
2027
2028extern __inline __m512d
2029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2030_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2031{
2032 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2033 (__v8df) __B,
2034 (__v8df)
2035 _mm512_setzero_pd (),
2036 (__mmask8) -1, __R);
2037}
2038
2039extern __inline __m512d
2040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2041_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2042 __m512d __B, const int __R)
2043{
2044 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2045 (__v8df) __B,
2046 (__v8df) __W,
2047 (__mmask8) __U, __R);
2048}
2049
2050extern __inline __m512d
2051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2052_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2053 const int __R)
2054{
2055 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2056 (__v8df) __B,
2057 (__v8df)
2058 _mm512_setzero_pd (),
2059 (__mmask8) __U, __R);
2060}
2061
2062extern __inline __m512
2063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2065{
2066 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2067 (__v16sf) __B,
2068 (__v16sf)
2069 _mm512_setzero_ps (),
2070 (__mmask16) -1, __R);
2071}
2072
2073extern __inline __m512
2074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2075_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2076 __m512 __B, const int __R)
2077{
2078 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2079 (__v16sf) __B,
2080 (__v16sf) __W,
2081 (__mmask16) __U, __R);
2082}
2083
2084extern __inline __m512
2085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2087{
2088 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2089 (__v16sf) __B,
2090 (__v16sf)
2091 _mm512_setzero_ps (),
2092 (__mmask16) __U, __R);
2093}
2094#else
2095#define _mm512_add_round_pd(A, B, C) \
2096 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2097
2098#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2099 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2100
2101#define _mm512_maskz_add_round_pd(U, A, B, C) \
2102 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2103
2104#define _mm512_add_round_ps(A, B, C) \
2105 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2106
2107#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2108 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2109
2110#define _mm512_maskz_add_round_ps(U, A, B, C) \
2111 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2112
2113#define _mm512_sub_round_pd(A, B, C) \
2114 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2115
2116#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2117 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2118
2119#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2120 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2121
2122#define _mm512_sub_round_ps(A, B, C) \
2123 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2124
2125#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2126 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2127
2128#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2129 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2130#endif
2131
2132#ifdef __OPTIMIZE__
2133extern __inline __m512d
2134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2136{
2137 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2138 (__v8df) __B,
2139 (__v8df)
2140 _mm512_setzero_pd (),
2141 (__mmask8) -1, __R);
2142}
2143
2144extern __inline __m512d
2145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2147 __m512d __B, const int __R)
2148{
2149 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2150 (__v8df) __B,
2151 (__v8df) __W,
2152 (__mmask8) __U, __R);
2153}
2154
2155extern __inline __m512d
2156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2158 const int __R)
2159{
2160 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2161 (__v8df) __B,
2162 (__v8df)
2163 _mm512_setzero_pd (),
2164 (__mmask8) __U, __R);
2165}
2166
2167extern __inline __m512
2168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2170{
2171 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2172 (__v16sf) __B,
2173 (__v16sf)
2174 _mm512_setzero_ps (),
2175 (__mmask16) -1, __R);
2176}
2177
2178extern __inline __m512
2179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2180_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2181 __m512 __B, const int __R)
2182{
2183 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2184 (__v16sf) __B,
2185 (__v16sf) __W,
2186 (__mmask16) __U, __R);
2187}
2188
2189extern __inline __m512
2190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2191_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2192{
2193 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2194 (__v16sf) __B,
2195 (__v16sf)
2196 _mm512_setzero_ps (),
2197 (__mmask16) __U, __R);
2198}
2199
2200extern __inline __m512d
2201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2203{
2204 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2205 (__v8df) __V,
2206 (__v8df)
2207 _mm512_setzero_pd (),
2208 (__mmask8) -1, __R);
2209}
2210
2211extern __inline __m512d
2212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2213_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2214 __m512d __V, const int __R)
2215{
2216 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2217 (__v8df) __V,
2218 (__v8df) __W,
2219 (__mmask8) __U, __R);
2220}
2221
2222extern __inline __m512d
2223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2224_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2225 const int __R)
2226{
2227 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2228 (__v8df) __V,
2229 (__v8df)
2230 _mm512_setzero_pd (),
2231 (__mmask8) __U, __R);
2232}
2233
2234extern __inline __m512
2235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2236_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2237{
2238 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2239 (__v16sf) __B,
2240 (__v16sf)
2241 _mm512_setzero_ps (),
2242 (__mmask16) -1, __R);
2243}
2244
2245extern __inline __m512
2246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2247_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2248 __m512 __B, const int __R)
2249{
2250 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2251 (__v16sf) __B,
2252 (__v16sf) __W,
2253 (__mmask16) __U, __R);
2254}
2255
2256extern __inline __m512
2257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2258_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2259{
2260 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2261 (__v16sf) __B,
2262 (__v16sf)
2263 _mm512_setzero_ps (),
2264 (__mmask16) __U, __R);
2265}
2266
075691af
AI
2267extern __inline __m128d
2268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2270{
2271 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2272 (__v2df) __B,
2273 __R);
2274}
2275
2276extern __inline __m128
2277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2278_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2279{
2280 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2281 (__v4sf) __B,
2282 __R);
2283}
2284
2285extern __inline __m128d
2286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2288{
2289 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2290 (__v2df) __B,
2291 __R);
2292}
2293
2294extern __inline __m128
2295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2297{
2298 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2299 (__v4sf) __B,
2300 __R);
2301}
2302
756c5857
AI
2303#else
2304#define _mm512_mul_round_pd(A, B, C) \
2305 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2306
2307#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2308 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2309
2310#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2311 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2312
2313#define _mm512_mul_round_ps(A, B, C) \
2314 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2315
2316#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2317 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2318
2319#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2320 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2321
2322#define _mm512_div_round_pd(A, B, C) \
2323 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2324
2325#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2326 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2327
2328#define _mm512_maskz_div_round_pd(U, A, B, C) \
2329 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2330
2331#define _mm512_div_round_ps(A, B, C) \
2332 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2333
2334#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2335 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2336
2337#define _mm512_maskz_div_round_ps(U, A, B, C) \
2338 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2339
2340#define _mm_mul_round_sd(A, B, C) \
2341 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2342
2343#define _mm_mul_round_ss(A, B, C) \
2344 (__m128)__builtin_ia32_mulss_round(A, B, C)
2345
2346#define _mm_div_round_sd(A, B, C) \
2347 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2348
2349#define _mm_div_round_ss(A, B, C) \
2350 (__m128)__builtin_ia32_divss_round(A, B, C)
756c5857
AI
2351#endif
2352
2353#ifdef __OPTIMIZE__
2354extern __inline __m512d
2355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2356_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2357{
2358 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2359 (__v8df) __B,
2360 (__v8df)
2361 _mm512_setzero_pd (),
2362 (__mmask8) -1, __R);
2363}
2364
2365extern __inline __m512d
2366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2367_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2368 __m512d __B, const int __R)
2369{
2370 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2371 (__v8df) __B,
2372 (__v8df) __W,
2373 (__mmask8) __U, __R);
2374}
2375
2376extern __inline __m512d
2377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2378_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2379 const int __R)
2380{
2381 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2382 (__v8df) __B,
2383 (__v8df)
2384 _mm512_setzero_pd (),
2385 (__mmask8) __U, __R);
2386}
2387
2388extern __inline __m512
2389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2390_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2391{
2392 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2393 (__v16sf) __B,
2394 (__v16sf)
2395 _mm512_setzero_ps (),
2396 (__mmask16) -1, __R);
2397}
2398
2399extern __inline __m512
2400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2402 __m512 __B, const int __R)
2403{
2404 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2405 (__v16sf) __B,
2406 (__v16sf) __W,
2407 (__mmask16) __U, __R);
2408}
2409
2410extern __inline __m512
2411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2412_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2413{
2414 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2415 (__v16sf) __B,
2416 (__v16sf)
2417 _mm512_setzero_ps (),
2418 (__mmask16) __U, __R);
2419}
2420
2421extern __inline __m512d
2422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2423_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2424{
2425 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2426 (__v8df) __B,
2427 (__v8df)
2428 _mm512_setzero_pd (),
2429 (__mmask8) -1, __R);
2430}
2431
2432extern __inline __m512d
2433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2434_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2435 __m512d __B, const int __R)
2436{
2437 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2438 (__v8df) __B,
2439 (__v8df) __W,
2440 (__mmask8) __U, __R);
2441}
2442
2443extern __inline __m512d
2444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2445_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2446 const int __R)
2447{
2448 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2449 (__v8df) __B,
2450 (__v8df)
2451 _mm512_setzero_pd (),
2452 (__mmask8) __U, __R);
2453}
2454
2455extern __inline __m512
2456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2457_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2458{
2459 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2460 (__v16sf) __B,
2461 (__v16sf)
2462 _mm512_setzero_ps (),
2463 (__mmask16) -1, __R);
2464}
2465
2466extern __inline __m512
2467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2468_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2469 __m512 __B, const int __R)
2470{
2471 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2472 (__v16sf) __B,
2473 (__v16sf) __W,
2474 (__mmask16) __U, __R);
2475}
2476
2477extern __inline __m512
2478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2479_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2480{
2481 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2482 (__v16sf) __B,
2483 (__v16sf)
2484 _mm512_setzero_ps (),
2485 (__mmask16) __U, __R);
2486}
2487#else
2488#define _mm512_max_round_pd(A, B, R) \
2489 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
2490
2491#define _mm512_mask_max_round_pd(W, U, A, B, R) \
2492 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2493
2494#define _mm512_maskz_max_round_pd(U, A, B, R) \
2495 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2496
2497#define _mm512_max_round_ps(A, B, R) \
2498 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_pd(), -1, R)
2499
2500#define _mm512_mask_max_round_ps(W, U, A, B, R) \
2501 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2502
2503#define _mm512_maskz_max_round_ps(U, A, B, R) \
2504 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2505
2506#define _mm512_min_round_pd(A, B, R) \
2507 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
2508
2509#define _mm512_mask_min_round_pd(W, U, A, B, R) \
2510 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2511
2512#define _mm512_maskz_min_round_pd(U, A, B, R) \
2513 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2514
2515#define _mm512_min_round_ps(A, B, R) \
2516 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, R)
2517
2518#define _mm512_mask_min_round_ps(W, U, A, B, R) \
2519 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2520
2521#define _mm512_maskz_min_round_ps(U, A, B, R) \
2522 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2523#endif
2524
2525#ifdef __OPTIMIZE__
2526extern __inline __m512d
2527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2528_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2529{
2530 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2531 (__v8df) __B,
2532 (__v8df)
2533 _mm512_setzero_pd (),
2534 (__mmask8) -1, __R);
2535}
2536
2537extern __inline __m512d
2538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2540 __m512d __B, const int __R)
2541{
2542 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2543 (__v8df) __B,
2544 (__v8df) __W,
2545 (__mmask8) __U, __R);
2546}
2547
2548extern __inline __m512d
2549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2550_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2551 const int __R)
2552{
2553 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2554 (__v8df) __B,
2555 (__v8df)
2556 _mm512_setzero_pd (),
2557 (__mmask8) __U, __R);
2558}
2559
2560extern __inline __m512
2561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2563{
2564 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2565 (__v16sf) __B,
2566 (__v16sf)
2567 _mm512_setzero_ps (),
2568 (__mmask16) -1, __R);
2569}
2570
2571extern __inline __m512
2572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2574 __m512 __B, const int __R)
2575{
2576 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2577 (__v16sf) __B,
2578 (__v16sf) __W,
2579 (__mmask16) __U, __R);
2580}
2581
2582extern __inline __m512
2583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2584_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2585 const int __R)
2586{
2587 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2588 (__v16sf) __B,
2589 (__v16sf)
2590 _mm512_setzero_ps (),
2591 (__mmask16) __U, __R);
2592}
2593
075691af
AI
2594extern __inline __m128d
2595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2596_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2597{
2598 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2599 (__v2df) __B,
2600 __R);
2601}
2602
2603extern __inline __m128
2604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2606{
2607 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2608 (__v4sf) __B,
2609 __R);
2610}
756c5857
AI
2611#else
2612#define _mm512_scalef_round_pd(A, B, C) \
2613 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2614
2615#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2616 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2617
2618#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2619 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2620
2621#define _mm512_scalef_round_ps(A, B, C) \
2622 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2623
2624#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2625 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2626
2627#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2628 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2629
2630#define _mm_scalef_round_sd(A, B, C) \
2631 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2632
2633#define _mm_scalef_round_ss(A, B, C) \
2634 (__m128)__builtin_ia32_scalefss_round(A, B, C)
756c5857
AI
2635#endif
2636
2637#ifdef __OPTIMIZE__
2638extern __inline __m512d
2639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2640_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2641{
2642 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2643 (__v8df) __B,
2644 (__v8df) __C,
2645 (__mmask8) -1, __R);
2646}
2647
2648extern __inline __m512d
2649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2651 __m512d __C, const int __R)
2652{
2653 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2654 (__v8df) __B,
2655 (__v8df) __C,
2656 (__mmask8) __U, __R);
2657}
2658
2659extern __inline __m512d
2660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2661_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2662 __mmask8 __U, const int __R)
2663{
2664 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2665 (__v8df) __B,
2666 (__v8df) __C,
2667 (__mmask8) __U, __R);
2668}
2669
2670extern __inline __m512d
2671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2672_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2673 __m512d __C, const int __R)
2674{
2675 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2676 (__v8df) __B,
2677 (__v8df) __C,
2678 (__mmask8) __U, __R);
2679}
2680
2681extern __inline __m512
2682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2683_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2684{
2685 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2686 (__v16sf) __B,
2687 (__v16sf) __C,
2688 (__mmask16) -1, __R);
2689}
2690
2691extern __inline __m512
2692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2693_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2694 __m512 __C, const int __R)
2695{
2696 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2697 (__v16sf) __B,
2698 (__v16sf) __C,
2699 (__mmask16) __U, __R);
2700}
2701
2702extern __inline __m512
2703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2704_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2705 __mmask16 __U, const int __R)
2706{
2707 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2708 (__v16sf) __B,
2709 (__v16sf) __C,
2710 (__mmask16) __U, __R);
2711}
2712
2713extern __inline __m512
2714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2715_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2716 __m512 __C, const int __R)
2717{
2718 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2719 (__v16sf) __B,
2720 (__v16sf) __C,
2721 (__mmask16) __U, __R);
2722}
2723
2724extern __inline __m512d
2725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2727{
2728 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2729 (__v8df) __B,
2730 -(__v8df) __C,
2731 (__mmask8) -1, __R);
2732}
2733
2734extern __inline __m512d
2735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2736_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2737 __m512d __C, const int __R)
2738{
2739 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2740 (__v8df) __B,
2741 -(__v8df) __C,
2742 (__mmask8) __U, __R);
2743}
2744
2745extern __inline __m512d
2746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2748 __mmask8 __U, const int __R)
2749{
2750 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2751 (__v8df) __B,
2752 (__v8df) __C,
2753 (__mmask8) __U, __R);
2754}
2755
2756extern __inline __m512d
2757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2759 __m512d __C, const int __R)
2760{
2761 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2762 (__v8df) __B,
2763 -(__v8df) __C,
2764 (__mmask8) __U, __R);
2765}
2766
2767extern __inline __m512
2768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2769_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2770{
2771 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2772 (__v16sf) __B,
2773 -(__v16sf) __C,
2774 (__mmask16) -1, __R);
2775}
2776
2777extern __inline __m512
2778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2779_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2780 __m512 __C, const int __R)
2781{
2782 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2783 (__v16sf) __B,
2784 -(__v16sf) __C,
2785 (__mmask16) __U, __R);
2786}
2787
2788extern __inline __m512
2789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2790_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2791 __mmask16 __U, const int __R)
2792{
2793 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2794 (__v16sf) __B,
2795 (__v16sf) __C,
2796 (__mmask16) __U, __R);
2797}
2798
2799extern __inline __m512
2800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2801_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2802 __m512 __C, const int __R)
2803{
2804 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2805 (__v16sf) __B,
2806 -(__v16sf) __C,
2807 (__mmask16) __U, __R);
2808}
2809
2810extern __inline __m512d
2811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2812_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2813{
2814 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2815 (__v8df) __B,
2816 (__v8df) __C,
2817 (__mmask8) -1, __R);
2818}
2819
2820extern __inline __m512d
2821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2823 __m512d __C, const int __R)
2824{
2825 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2826 (__v8df) __B,
2827 (__v8df) __C,
2828 (__mmask8) __U, __R);
2829}
2830
2831extern __inline __m512d
2832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2833_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2834 __mmask8 __U, const int __R)
2835{
2836 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2837 (__v8df) __B,
2838 (__v8df) __C,
2839 (__mmask8) __U, __R);
2840}
2841
2842extern __inline __m512d
2843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2844_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2845 __m512d __C, const int __R)
2846{
2847 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2848 (__v8df) __B,
2849 (__v8df) __C,
2850 (__mmask8) __U, __R);
2851}
2852
2853extern __inline __m512
2854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2855_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2856{
2857 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2858 (__v16sf) __B,
2859 (__v16sf) __C,
2860 (__mmask16) -1, __R);
2861}
2862
2863extern __inline __m512
2864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2865_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2866 __m512 __C, const int __R)
2867{
2868 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2869 (__v16sf) __B,
2870 (__v16sf) __C,
2871 (__mmask16) __U, __R);
2872}
2873
2874extern __inline __m512
2875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2876_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2877 __mmask16 __U, const int __R)
2878{
2879 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2880 (__v16sf) __B,
2881 (__v16sf) __C,
2882 (__mmask16) __U, __R);
2883}
2884
2885extern __inline __m512
2886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2887_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2888 __m512 __C, const int __R)
2889{
2890 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2891 (__v16sf) __B,
2892 (__v16sf) __C,
2893 (__mmask16) __U, __R);
2894}
2895
2896extern __inline __m512d
2897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2898_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2899{
2900 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2901 (__v8df) __B,
2902 -(__v8df) __C,
2903 (__mmask8) -1, __R);
2904}
2905
2906extern __inline __m512d
2907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2908_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2909 __m512d __C, const int __R)
2910{
2911 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2912 (__v8df) __B,
2913 -(__v8df) __C,
2914 (__mmask8) __U, __R);
2915}
2916
2917extern __inline __m512d
2918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2919_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2920 __mmask8 __U, const int __R)
2921{
2922 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2923 (__v8df) __B,
2924 (__v8df) __C,
2925 (__mmask8) __U, __R);
2926}
2927
2928extern __inline __m512d
2929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2931 __m512d __C, const int __R)
2932{
2933 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2934 (__v8df) __B,
2935 -(__v8df) __C,
2936 (__mmask8) __U, __R);
2937}
2938
2939extern __inline __m512
2940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2941_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2942{
2943 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2944 (__v16sf) __B,
2945 -(__v16sf) __C,
2946 (__mmask16) -1, __R);
2947}
2948
2949extern __inline __m512
2950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2951_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2952 __m512 __C, const int __R)
2953{
2954 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2955 (__v16sf) __B,
2956 -(__v16sf) __C,
2957 (__mmask16) __U, __R);
2958}
2959
2960extern __inline __m512
2961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2962_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2963 __mmask16 __U, const int __R)
2964{
2965 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2966 (__v16sf) __B,
2967 (__v16sf) __C,
2968 (__mmask16) __U, __R);
2969}
2970
2971extern __inline __m512
2972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2973_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2974 __m512 __C, const int __R)
2975{
2976 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2977 (__v16sf) __B,
2978 -(__v16sf) __C,
2979 (__mmask16) __U, __R);
2980}
2981
2982extern __inline __m512d
2983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2984_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2985{
2986 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2987 (__v8df) __B,
2988 (__v8df) __C,
2989 (__mmask8) -1, __R);
2990}
2991
2992extern __inline __m512d
2993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2994_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2995 __m512d __C, const int __R)
2996{
2997 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
2998 (__v8df) __B,
2999 (__v8df) __C,
3000 (__mmask8) __U, __R);
3001}
3002
3003extern __inline __m512d
3004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3005_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3006 __mmask8 __U, const int __R)
3007{
3008 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3009 (__v8df) __B,
3010 (__v8df) __C,
3011 (__mmask8) __U, __R);
3012}
3013
3014extern __inline __m512d
3015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3016_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3017 __m512d __C, const int __R)
3018{
3019 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3020 (__v8df) __B,
3021 (__v8df) __C,
3022 (__mmask8) __U, __R);
3023}
3024
3025extern __inline __m512
3026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3027_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3028{
3029 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3030 (__v16sf) __B,
3031 (__v16sf) __C,
3032 (__mmask16) -1, __R);
3033}
3034
3035extern __inline __m512
3036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3037_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3038 __m512 __C, const int __R)
3039{
3040 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3041 (__v16sf) __B,
3042 (__v16sf) __C,
3043 (__mmask16) __U, __R);
3044}
3045
3046extern __inline __m512
3047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3048_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3049 __mmask16 __U, const int __R)
3050{
3051 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3052 (__v16sf) __B,
3053 (__v16sf) __C,
3054 (__mmask16) __U, __R);
3055}
3056
3057extern __inline __m512
3058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3059_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3060 __m512 __C, const int __R)
3061{
3062 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3063 (__v16sf) __B,
3064 (__v16sf) __C,
3065 (__mmask16) __U, __R);
3066}
3067
3068extern __inline __m512d
3069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3070_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3071{
3072 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3073 (__v8df) __B,
3074 -(__v8df) __C,
3075 (__mmask8) -1, __R);
3076}
3077
3078extern __inline __m512d
3079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3080_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3081 __m512d __C, const int __R)
3082{
3083 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3084 (__v8df) __B,
3085 (__v8df) __C,
3086 (__mmask8) __U, __R);
3087}
3088
3089extern __inline __m512d
3090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3091_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3092 __mmask8 __U, const int __R)
3093{
3094 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3095 (__v8df) __B,
3096 (__v8df) __C,
3097 (__mmask8) __U, __R);
3098}
3099
3100extern __inline __m512d
3101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3102_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3103 __m512d __C, const int __R)
3104{
3105 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3106 (__v8df) __B,
3107 -(__v8df) __C,
3108 (__mmask8) __U, __R);
3109}
3110
3111extern __inline __m512
3112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3113_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3114{
3115 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3116 (__v16sf) __B,
3117 -(__v16sf) __C,
3118 (__mmask16) -1, __R);
3119}
3120
3121extern __inline __m512
3122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3123_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3124 __m512 __C, const int __R)
3125{
3126 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3127 (__v16sf) __B,
3128 (__v16sf) __C,
3129 (__mmask16) __U, __R);
3130}
3131
3132extern __inline __m512
3133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3134_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3135 __mmask16 __U, const int __R)
3136{
3137 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3138 (__v16sf) __B,
3139 (__v16sf) __C,
3140 (__mmask16) __U, __R);
3141}
3142
3143extern __inline __m512
3144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3145_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3146 __m512 __C, const int __R)
3147{
3148 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3149 (__v16sf) __B,
3150 -(__v16sf) __C,
3151 (__mmask16) __U, __R);
3152}
3153#else
3154#define _mm512_fmadd_round_pd(A, B, C, R) \
3155 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3156
3157#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3158 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3159
3160#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3161 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3162
3163#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3164 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3165
3166#define _mm512_fmadd_round_ps(A, B, C, R) \
3167 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3168
3169#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3170 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3171
3172#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3173 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3174
3175#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3176 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3177
3178#define _mm512_fmsub_round_pd(A, B, C, R) \
3179 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3180
3181#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3182 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3183
3184#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3185 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3186
3187#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3188 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3189
3190#define _mm512_fmsub_round_ps(A, B, C, R) \
3191 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3192
3193#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3194 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3195
3196#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3197 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3198
3199#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3200 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3201
3202#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3203 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3204
3205#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3206 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3207
3208#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3209 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3210
3211#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3212 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3213
3214#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3215 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3216
3217#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3218 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3219
3220#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3221 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3222
3223#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3224 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3225
3226#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3227 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3228
3229#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3230 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3231
3232#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3233 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3234
3235#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3236 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3237
3238#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3239 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3240
3241#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3242 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3243
3244#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3245 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3246
3247#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3248 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3249
3250#define _mm512_fnmadd_round_pd(A, B, C, R) \
3251 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3252
3253#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3254 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3255
3256#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3257 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3258
3259#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3260 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3261
3262#define _mm512_fnmadd_round_ps(A, B, C, R) \
3263 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3264
3265#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3266 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3267
3268#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3269 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3270
3271#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3272 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3273
3274#define _mm512_fnmsub_round_pd(A, B, C, R) \
3275 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3276
3277#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3278 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3279
3280#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3281 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3282
3283#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3284 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3285
3286#define _mm512_fnmsub_round_ps(A, B, C, R) \
3287 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3288
3289#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3290 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3291
3292#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3293 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3294
3295#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3296 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3297#endif
3298
3299extern __inline __m512i
3300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3301_mm512_abs_epi64 (__m512i __A)
3302{
3303 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3304 (__v8di)
3305 _mm512_setzero_si512 (),
3306 (__mmask8) -1);
3307}
3308
3309extern __inline __m512i
3310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3311_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3312{
3313 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3314 (__v8di) __W,
3315 (__mmask8) __U);
3316}
3317
3318extern __inline __m512i
3319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3320_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3321{
3322 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3323 (__v8di)
3324 _mm512_setzero_si512 (),
3325 (__mmask8) __U);
3326}
3327
3328extern __inline __m512i
3329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3330_mm512_abs_epi32 (__m512i __A)
3331{
3332 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3333 (__v16si)
3334 _mm512_setzero_si512 (),
3335 (__mmask16) -1);
3336}
3337
3338extern __inline __m512i
3339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3340_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3341{
3342 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3343 (__v16si) __W,
3344 (__mmask16) __U);
3345}
3346
3347extern __inline __m512i
3348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3349_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3350{
3351 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3352 (__v16si)
3353 _mm512_setzero_si512 (),
3354 (__mmask16) __U);
3355}
3356
3357extern __inline __m512
3358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3359_mm512_broadcastss_ps (__m128 __A)
3360{
3361 __v16sf __O;
3362 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, __O,
3363 (__mmask16) -1);
3364}
3365
3366extern __inline __m512
3367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3368_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3369{
3370 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3371 (__v16sf) __O, __M);
3372}
3373
3374extern __inline __m512
3375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3376_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3377{
3378 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3379 (__v16sf)
3380 _mm512_setzero_ps (),
3381 __M);
3382}
3383
3384extern __inline __m512d
3385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3386_mm512_broadcastsd_pd (__m128d __A)
3387{
3388 __v8df __O;
3389 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, __O,
3390 (__mmask8) -1);
3391}
3392
3393extern __inline __m512d
3394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3395_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3396{
3397 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3398 (__v8df) __O, __M);
3399}
3400
3401extern __inline __m512d
3402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3403_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3404{
3405 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3406 (__v8df)
3407 _mm512_setzero_pd (),
3408 __M);
3409}
3410
3411extern __inline __m512i
3412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3413_mm512_broadcastd_epi32 (__m128i __A)
3414{
3415 __v16si __O;
3416 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, __O,
3417 (__mmask16) -1);
3418}
3419
3420extern __inline __m512i
3421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3422_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3423{
3424 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3425 (__v16si) __O, __M);
3426}
3427
3428extern __inline __m512i
3429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3430_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3431{
3432 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3433 (__v16si)
3434 _mm512_setzero_si512 (),
3435 __M);
3436}
3437
3438extern __inline __m512i
3439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3440_mm512_set1_epi32 (int __A)
3441{
3442 __v16si __O;
3443 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, __O,
3444 (__mmask16)(-1));
3445}
3446
3447extern __inline __m512i
3448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3449_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3450{
3451 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3452 __M);
3453}
3454
3455extern __inline __m512i
3456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3458{
3459 return (__m512i)
3460 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3461 (__v16si) _mm512_setzero_si512 (),
3462 __M);
3463}
3464
3465extern __inline __m512i
3466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3467_mm512_broadcastq_epi64 (__m128i __A)
3468{
3469 __v8di __O;
3470 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, __O,
3471 (__mmask8) -1);
3472}
3473
3474extern __inline __m512i
3475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3476_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3477{
3478 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3479 (__v8di) __O, __M);
3480}
3481
3482extern __inline __m512i
3483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3484_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3485{
3486 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3487 (__v8di)
3488 _mm512_setzero_si512 (),
3489 __M);
3490}
3491
3492extern __inline __m512i
3493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3494_mm512_set1_epi64 (long long __A)
3495{
3496 __v8di __O;
3497#ifdef TARGET_64BIT
3498 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, __O,
3499 (__mmask8)(-1));
3500#else
3501 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, __O,
3502 (__mmask8)(-1));
3503#endif
3504}
3505
3506extern __inline __m512i
3507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3508_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3509{
3510#ifdef TARGET_64BIT
3511 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3512 __M);
3513#else
3514 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
3515 __M);
3516#endif
3517}
3518
3519extern __inline __m512i
3520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3521_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3522{
3523#ifdef TARGET_64BIT
3524 return (__m512i)
3525 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3526 (__v8di) _mm512_setzero_si512 (),
3527 __M);
3528#else
3529 return (__m512i)
3530 __builtin_ia32_pbroadcastq512_mem_mask (__A,
3531 (__v8di) _mm512_setzero_si512 (),
3532 __M);
3533#endif
3534}
3535
3536extern __inline __m512
3537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538_mm512_broadcast_f32x4 (__m128 __A)
3539{
3540 __v16sf __O;
3541 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, __O,
3542 (__mmask16) -1);
3543}
3544
3545extern __inline __m512
3546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3547_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3548{
3549 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3550 (__v16sf) __O,
3551 __M);
3552}
3553
3554extern __inline __m512
3555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3557{
3558 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3559 (__v16sf)
3560 _mm512_setzero_ps (),
3561 __M);
3562}
3563
3564extern __inline __m512i
3565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566_mm512_broadcast_i32x4 (__m128i __A)
3567{
3568 __v16si __O;
3569 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3570 __O,
3571 (__mmask16) -1);
3572}
3573
3574extern __inline __m512i
3575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3577{
3578 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3579 (__v16si) __O,
3580 __M);
3581}
3582
3583extern __inline __m512i
3584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3585_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3586{
3587 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3588 (__v16si)
3589 _mm512_setzero_si512 (),
3590 __M);
3591}
3592
3593extern __inline __m512d
3594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3595_mm512_broadcast_f64x4 (__m256d __A)
3596{
3597 __v8df __O;
3598 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3599 __O,
3600 (__mmask8) -1);
3601}
3602
3603extern __inline __m512d
3604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3605_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3606{
3607 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3608 (__v8df) __O,
3609 __M);
3610}
3611
3612extern __inline __m512d
3613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3615{
3616 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3617 (__v8df)
3618 _mm512_setzero_pd (),
3619 __M);
3620}
3621
3622extern __inline __m512i
3623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3624_mm512_broadcast_i64x4 (__m256i __A)
3625{
3626 __v8di __O;
3627 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3628 __O,
3629 (__mmask8) -1);
3630}
3631
3632extern __inline __m512i
3633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3634_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3635{
3636 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3637 (__v8di) __O,
3638 __M);
3639}
3640
3641extern __inline __m512i
3642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3643_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3644{
3645 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3646 (__v8di)
3647 _mm512_setzero_si512 (),
3648 __M);
3649}
3650
3651typedef enum
3652{
3653 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3654 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3655 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3656 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3657 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3658 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3659 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3660 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3661 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3662 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3663 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3664 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3665 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3666 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3667 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3668 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3669 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3670 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3671 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3672 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3673 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3674 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3675 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3676 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3677 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3678 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3679 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3680 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3681 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3682 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3683 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3684 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3685 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3686 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3687 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3688 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3689 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3690 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3691 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3692 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3693 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3694 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3695 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3696 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3697 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3698 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3699 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3700 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3701 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3702 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3703 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3704 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3705 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3706 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3707 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3708 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3709 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3710 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3711 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3712 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3713 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3714 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3715 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3716 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3717 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3718 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3719 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3720 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3721 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3722 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3723 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3724 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3725 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3726 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3727 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3728 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3729 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3730 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3731 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3732 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3733 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3734 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3735 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3736 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3737 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3738 _MM_PERM_DDDD = 0xFF
3739} _MM_PERM_ENUM;
3740
3741#ifdef __OPTIMIZE__
3742extern __inline __m512i
3743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3744_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3745{
3746 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3747 __mask,
3748 (__v16si)
3749 _mm512_setzero_si512 (),
3750 (__mmask16) -1);
3751}
3752
3753extern __inline __m512i
3754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3755_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3756 _MM_PERM_ENUM __mask)
3757{
3758 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3759 __mask,
3760 (__v16si) __W,
3761 (__mmask16) __U);
3762}
3763
3764extern __inline __m512i
3765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3766_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3767{
3768 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3769 __mask,
3770 (__v16si)
3771 _mm512_setzero_si512 (),
3772 (__mmask16) __U);
3773}
3774
3775extern __inline __m512i
3776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3777_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3778{
3779 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3780 (__v8di) __B, __imm,
3781 (__v8di)
3782 _mm512_setzero_si512 (),
3783 (__mmask8) -1);
3784}
3785
3786extern __inline __m512i
3787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3788_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3789 __m512i __B, const int __imm)
3790{
3791 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3792 (__v8di) __B, __imm,
3793 (__v8di) __W,
3794 (__mmask8) __U);
3795}
3796
3797extern __inline __m512i
3798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3799_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3800 const int __imm)
3801{
3802 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3803 (__v8di) __B, __imm,
3804 (__v8di)
3805 _mm512_setzero_si512 (),
3806 (__mmask8) __U);
3807}
3808
3809extern __inline __m512i
3810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3811_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3812{
3813 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3814 (__v16si) __B,
3815 __imm,
3816 (__v16si)
3817 _mm512_setzero_si512 (),
3818 (__mmask16) -1);
3819}
3820
3821extern __inline __m512i
3822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3823_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3824 __m512i __B, const int __imm)
3825{
3826 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3827 (__v16si) __B,
3828 __imm,
3829 (__v16si) __W,
3830 (__mmask16) __U);
3831}
3832
3833extern __inline __m512i
3834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3835_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3836 const int __imm)
3837{
3838 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3839 (__v16si) __B,
3840 __imm,
3841 (__v16si)
3842 _mm512_setzero_si512 (),
3843 (__mmask16) __U);
3844}
3845
3846extern __inline __m512d
3847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3849{
3850 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3851 (__v8df) __B, __imm,
3852 (__v8df)
3853 _mm512_setzero_pd (),
3854 (__mmask8) -1);
3855}
3856
3857extern __inline __m512d
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3860 __m512d __B, const int __imm)
3861{
3862 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3863 (__v8df) __B, __imm,
3864 (__v8df) __W,
3865 (__mmask8) __U);
3866}
3867
3868extern __inline __m512d
3869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3871 const int __imm)
3872{
3873 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3874 (__v8df) __B, __imm,
3875 (__v8df)
3876 _mm512_setzero_pd (),
3877 (__mmask8) __U);
3878}
3879
3880extern __inline __m512
3881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3882_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3883{
3884 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3885 (__v16sf) __B, __imm,
3886 (__v16sf)
3887 _mm512_setzero_ps (),
3888 (__mmask16) -1);
3889}
3890
3891extern __inline __m512
3892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3893_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3894 __m512 __B, const int __imm)
3895{
3896 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3897 (__v16sf) __B, __imm,
3898 (__v16sf) __W,
3899 (__mmask16) __U);
3900}
3901
3902extern __inline __m512
3903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3904_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
3905 const int __imm)
3906{
3907 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3908 (__v16sf) __B, __imm,
3909 (__v16sf)
3910 _mm512_setzero_ps (),
3911 (__mmask16) __U);
3912}
3913
3914#else
3915#define _mm512_shuffle_epi32(X, C) \
3916 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
3917 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3918 (__mmask16)-1))
3919
3920#define _mm512_mask_shuffle_epi32(W, U, X, C) \
3921 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
3922 (__v16si)(__m512i)(W),\
3923 (__mmask16)(U)))
3924
3925#define _mm512_maskz_shuffle_epi32(U, X, C) \
3926 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
3927 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3928 (__mmask16)(U)))
3929
3930#define _mm512_shuffle_i64x2(X, Y, C) \
3931 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
3932 (__v8di)(__m512i)(Y), (int)(C),\
3933 (__v8di)(__m512i)_mm512_setzero_si512 (),\
3934 (__mmask8)-1))
3935
3936#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
3937 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
3938 (__v8di)(__m512i)(Y), (int)(C),\
3939 (__v8di)(__m512i)(W),\
3940 (__mmask8)(U)))
3941
3942#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
3943 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
3944 (__v8di)(__m512i)(Y), (int)(C),\
3945 (__v8di)(__m512i)_mm512_setzero_si512 (),\
3946 (__mmask8)(U)))
3947
3948#define _mm512_shuffle_i32x4(X, Y, C) \
3949 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
3950 (__v16si)(__m512i)(Y), (int)(C),\
3951 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3952 (__mmask16)-1))
3953
3954#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
3955 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
3956 (__v16si)(__m512i)(Y), (int)(C),\
3957 (__v16si)(__m512i)(W),\
3958 (__mmask16)(U)))
3959
3960#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
3961 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
3962 (__v16si)(__m512i)(Y), (int)(C),\
3963 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3964 (__mmask16)(U)))
3965
3966#define _mm512_shuffle_f64x2(X, Y, C) \
3967 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
3968 (__v8df)(__m512d)(Y), (int)(C),\
3969 (__v8df)(__m512d)_mm512_setzero_pd(),\
3970 (__mmask8)-1))
3971
3972#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
3973 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
3974 (__v8df)(__m512d)(Y), (int)(C),\
3975 (__v8df)(__m512d)(W),\
3976 (__mmask8)(U)))
3977
3978#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
3979 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
3980 (__v8df)(__m512d)(Y), (int)(C),\
3981 (__v8df)(__m512d)_mm512_setzero_pd(),\
3982 (__mmask8)(U)))
3983
3984#define _mm512_shuffle_f32x4(X, Y, C) \
3985 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
3986 (__v16sf)(__m512)(Y), (int)(C),\
3987 (__v16sf)(__m512)_mm512_setzero_ps(),\
3988 (__mmask16)-1))
3989
3990#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
3991 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
3992 (__v16sf)(__m512)(Y), (int)(C),\
3993 (__v16sf)(__m512)(W),\
3994 (__mmask16)(U)))
3995
3996#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
3997 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
3998 (__v16sf)(__m512)(Y), (int)(C),\
3999 (__v16sf)(__m512)_mm512_setzero_ps(),\
4000 (__mmask16)(U)))
4001#endif
4002
4003extern __inline __m512i
4004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4005_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4006{
4007 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4008 (__v16si) __B,
4009 (__v16si)
4010 _mm512_setzero_si512 (),
4011 (__mmask16) -1);
4012}
4013
4014extern __inline __m512i
4015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4016_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4017{
4018 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4019 (__v16si) __B,
4020 (__v16si) __W,
4021 (__mmask16) __U);
4022}
4023
4024extern __inline __m512i
4025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4026_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4027{
4028 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4029 (__v16si) __B,
4030 (__v16si)
4031 _mm512_setzero_si512 (),
4032 (__mmask16) __U);
4033}
4034
4035extern __inline __m512i
4036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4037_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4038{
4039 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4040 (__v16si) __B,
4041 (__v16si)
4042 _mm512_setzero_si512 (),
4043 (__mmask16) -1);
4044}
4045
4046extern __inline __m512i
4047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4048_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4049{
4050 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4051 (__v16si) __B,
4052 (__v16si) __W,
4053 (__mmask16) __U);
4054}
4055
4056extern __inline __m512i
4057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4059{
4060 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4061 (__v16si) __B,
4062 (__v16si)
4063 _mm512_setzero_si512 (),
4064 (__mmask16) __U);
4065}
4066
4067extern __inline __m512i
4068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4069_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4070{
4071 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4072 (__v8di) __B,
4073 (__v8di)
4074 _mm512_setzero_si512 (),
4075 (__mmask8) -1);
4076}
4077
4078extern __inline __m512i
4079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4080_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4081{
4082 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4083 (__v8di) __B,
4084 (__v8di) __W,
4085 (__mmask8) __U);
4086}
4087
4088extern __inline __m512i
4089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4090_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4091{
4092 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4093 (__v8di) __B,
4094 (__v8di)
4095 _mm512_setzero_si512 (),
4096 (__mmask8) __U);
4097}
4098
4099extern __inline __m512i
4100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4101_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4102{
4103 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4104 (__v8di) __B,
4105 (__v8di)
4106 _mm512_setzero_si512 (),
4107 (__mmask8) -1);
4108}
4109
4110extern __inline __m512i
4111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4112_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4113{
4114 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4115 (__v8di) __B,
4116 (__v8di) __W,
4117 (__mmask8) __U);
4118}
4119
4120extern __inline __m512i
4121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4122_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4123{
4124 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4125 (__v8di) __B,
4126 (__v8di)
4127 _mm512_setzero_si512 (),
4128 (__mmask8) __U);
4129}
4130
4131#ifdef __OPTIMIZE__
4132extern __inline __m256i
4133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4134_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4135{
4136 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4137 (__v8si)
4138 _mm256_setzero_si256 (),
4139 (__mmask8) -1, __R);
4140}
4141
4142extern __inline __m256i
4143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4144_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4145 const int __R)
4146{
4147 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4148 (__v8si) __W,
4149 (__mmask8) __U, __R);
4150}
4151
4152extern __inline __m256i
4153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4154_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4155{
4156 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4157 (__v8si)
4158 _mm256_setzero_si256 (),
4159 (__mmask8) __U, __R);
4160}
4161
4162extern __inline __m256i
4163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4165{
4166 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4167 (__v8si)
4168 _mm256_setzero_si256 (),
4169 (__mmask8) -1, __R);
4170}
4171
4172extern __inline __m256i
4173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4174_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4175 const int __R)
4176{
4177 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4178 (__v8si) __W,
4179 (__mmask8) __U, __R);
4180}
4181
4182extern __inline __m256i
4183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4184_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4185{
4186 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4187 (__v8si)
4188 _mm256_setzero_si256 (),
4189 (__mmask8) __U, __R);
4190}
4191#else
4192#define _mm512_cvtt_roundpd_epi32(A, B) \
4193 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4194
4195#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4196 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4197
4198#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4199 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4200
4201#define _mm512_cvtt_roundpd_epu32(A, B) \
4202 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4203
4204#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4205 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4206
4207#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4208 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4209#endif
4210
4211#ifdef __OPTIMIZE__
4212extern __inline __m256i
4213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4215{
4216 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4217 (__v8si)
4218 _mm256_setzero_si256 (),
4219 (__mmask8) -1, __R);
4220}
4221
4222extern __inline __m256i
4223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4225 const int __R)
4226{
4227 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4228 (__v8si) __W,
4229 (__mmask8) __U, __R);
4230}
4231
4232extern __inline __m256i
4233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4235{
4236 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4237 (__v8si)
4238 _mm256_setzero_si256 (),
4239 (__mmask8) __U, __R);
4240}
4241
4242extern __inline __m256i
4243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4245{
4246 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4247 (__v8si)
4248 _mm256_setzero_si256 (),
4249 (__mmask8) -1, __R);
4250}
4251
4252extern __inline __m256i
4253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4255 const int __R)
4256{
4257 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4258 (__v8si) __W,
4259 (__mmask8) __U, __R);
4260}
4261
4262extern __inline __m256i
4263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4264_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4265{
4266 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4267 (__v8si)
4268 _mm256_setzero_si256 (),
4269 (__mmask8) __U, __R);
4270}
4271#else
4272#define _mm512_cvt_roundpd_epi32(A, B) \
4273 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4274
4275#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4276 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4277
4278#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4279 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4280
4281#define _mm512_cvt_roundpd_epu32(A, B) \
4282 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4283
4284#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4285 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4286
4287#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4288 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4289#endif
4290
4291#ifdef __OPTIMIZE__
4292extern __inline __m512i
4293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4294_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4295{
4296 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4297 (__v16si)
4298 _mm512_setzero_si512 (),
4299 (__mmask16) -1, __R);
4300}
4301
4302extern __inline __m512i
4303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4305 const int __R)
4306{
4307 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4308 (__v16si) __W,
4309 (__mmask16) __U, __R);
4310}
4311
4312extern __inline __m512i
4313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4315{
4316 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4317 (__v16si)
4318 _mm512_setzero_si512 (),
4319 (__mmask16) __U, __R);
4320}
4321
4322extern __inline __m512i
4323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4324_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4325{
4326 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4327 (__v16si)
4328 _mm512_setzero_si512 (),
4329 (__mmask16) -1, __R);
4330}
4331
4332extern __inline __m512i
4333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4334_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4335 const int __R)
4336{
4337 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4338 (__v16si) __W,
4339 (__mmask16) __U, __R);
4340}
4341
4342extern __inline __m512i
4343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4344_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4345{
4346 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4347 (__v16si)
4348 _mm512_setzero_si512 (),
4349 (__mmask16) __U, __R);
4350}
4351#else
4352#define _mm512_cvtt_roundps_epi32(A, B) \
4353 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4354
4355#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4356 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4357
4358#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4359 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4360
4361#define _mm512_cvtt_roundps_epu32(A, B) \
4362 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4363
4364#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4365 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4366
4367#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4368 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4369#endif
4370
4371#ifdef __OPTIMIZE__
4372extern __inline __m512i
4373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4374_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4375{
4376 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4377 (__v16si)
4378 _mm512_setzero_si512 (),
4379 (__mmask16) -1, __R);
4380}
4381
4382extern __inline __m512i
4383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4384_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4385 const int __R)
4386{
4387 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4388 (__v16si) __W,
4389 (__mmask16) __U, __R);
4390}
4391
4392extern __inline __m512i
4393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4394_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4395{
4396 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4397 (__v16si)
4398 _mm512_setzero_si512 (),
4399 (__mmask16) __U, __R);
4400}
4401
4402extern __inline __m512i
4403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4404_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4405{
4406 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4407 (__v16si)
4408 _mm512_setzero_si512 (),
4409 (__mmask16) -1, __R);
4410}
4411
4412extern __inline __m512i
4413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4415 const int __R)
4416{
4417 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4418 (__v16si) __W,
4419 (__mmask16) __U, __R);
4420}
4421
4422extern __inline __m512i
4423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4424_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4425{
4426 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4427 (__v16si)
4428 _mm512_setzero_si512 (),
4429 (__mmask16) __U, __R);
4430}
4431#else
4432#define _mm512_cvt_roundps_epi32(A, B) \
4433 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4434
4435#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4436 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4437
4438#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4439 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4440
4441#define _mm512_cvt_roundps_epu32(A, B) \
4442 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4443
4444#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4445 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4446
4447#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4448 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4449#endif
4450
4451extern __inline __m128d
4452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4453_mm_cvtu32_sd (__m128d __A, unsigned __B)
4454{
4455 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4456}
4457
4458#ifdef __x86_64__
4459#ifdef __OPTIMIZE__
4460extern __inline __m128d
4461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4462_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4463{
4464 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4465}
4466
4467extern __inline __m128d
4468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4469_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4470{
4471 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4472}
4473
4474extern __inline __m128d
4475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4476_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4477{
4478 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4479}
4480#else
4481#define _mm_cvt_roundu64_sd(A, B, C) \
4482 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4483
4484#define _mm_cvt_roundi64_sd(A, B, C) \
4485 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4486
4487#define _mm_cvt_roundsi64_sd(A, B, C) \
4488 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4489#endif
4490
4491#endif
4492
4493#ifdef __OPTIMIZE__
4494extern __inline __m128
4495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4496_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4497{
4498 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4499}
4500
4501extern __inline __m128
4502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4504{
4505 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4506}
4507
4508extern __inline __m128
4509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4510_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4511{
4512 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4513}
4514#else
4515#define _mm_cvt_roundu32_ss(A, B, C) \
4516 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4517
4518#define _mm_cvt_roundi32_ss(A, B, C) \
4519 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4520
4521#define _mm_cvt_roundsi32_ss(A, B, C) \
4522 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4523#endif
4524
4525#ifdef __x86_64__
4526#ifdef __OPTIMIZE__
4527extern __inline __m128
4528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4529_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4530{
4531 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4532}
4533
4534extern __inline __m128
4535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4537{
4538 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4539}
4540
4541extern __inline __m128
4542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4543_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4544{
4545 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4546}
4547#else
4548#define _mm_cvt_roundu64_ss(A, B, C) \
4549 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4550
4551#define _mm_cvt_roundi64_ss(A, B, C) \
4552 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4553
4554#define _mm_cvt_roundsi64_ss(A, B, C) \
4555 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4556#endif
4557
4558#endif
4559
4560extern __inline __m128i
4561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4562_mm512_cvtepi32_epi8 (__m512i __A)
4563{
4564 __v16qi __O;
4565 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, __O,
4566 (__mmask16) -1);
4567}
4568
4569extern __inline __m128i
4570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4571_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4572{
4573 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4574 (__v16qi) __O, __M);
4575}
4576
4577extern __inline __m128i
4578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4579_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4580{
4581 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4582 (__v16qi)
4583 _mm_setzero_si128 (),
4584 __M);
4585}
4586
4587extern __inline __m128i
4588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4589_mm512_cvtsepi32_epi8 (__m512i __A)
4590{
4591 __v16qi __O;
4592 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, __O,
4593 (__mmask16) -1);
4594}
4595
4596extern __inline __m128i
4597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4599{
4600 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4601 (__v16qi) __O, __M);
4602}
4603
4604extern __inline __m128i
4605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4606_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4607{
4608 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4609 (__v16qi)
4610 _mm_setzero_si128 (),
4611 __M);
4612}
4613
4614extern __inline __m128i
4615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4616_mm512_cvtusepi32_epi8 (__m512i __A)
4617{
4618 __v16qi __O;
4619 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, __O,
4620 (__mmask16) -1);
4621}
4622
4623extern __inline __m128i
4624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4625_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4626{
4627 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4628 (__v16qi) __O,
4629 __M);
4630}
4631
4632extern __inline __m128i
4633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4634_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4635{
4636 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4637 (__v16qi)
4638 _mm_setzero_si128 (),
4639 __M);
4640}
4641
4642extern __inline __m256i
4643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4644_mm512_cvtepi32_epi16 (__m512i __A)
4645{
4646 __v16hi __O;
4647 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, __O,
4648 (__mmask16) -1);
4649}
4650
4651extern __inline __m256i
4652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4653_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4654{
4655 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4656 (__v16hi) __O, __M);
4657}
4658
4659extern __inline __m256i
4660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4661_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4662{
4663 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4664 (__v16hi)
4665 _mm256_setzero_si256 (),
4666 __M);
4667}
4668
4669extern __inline __m256i
4670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4671_mm512_cvtsepi32_epi16 (__m512i __A)
4672{
4673 __v16hi __O;
4674 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, __O,
4675 (__mmask16) -1);
4676}
4677
4678extern __inline __m256i
4679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4680_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4681{
4682 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4683 (__v16hi) __O, __M);
4684}
4685
4686extern __inline __m256i
4687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4688_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4689{
4690 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4691 (__v16hi)
4692 _mm256_setzero_si256 (),
4693 __M);
4694}
4695
4696extern __inline __m256i
4697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698_mm512_cvtusepi32_epi16 (__m512i __A)
4699{
4700 __v16hi __O;
4701 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, __O,
4702 (__mmask16) -1);
4703}
4704
4705extern __inline __m256i
4706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4707_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4708{
4709 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4710 (__v16hi) __O,
4711 __M);
4712}
4713
4714extern __inline __m256i
4715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4716_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4717{
4718 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4719 (__v16hi)
4720 _mm256_setzero_si256 (),
4721 __M);
4722}
4723
4724extern __inline __m256i
4725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4726_mm512_cvtepi64_epi32 (__m512i __A)
4727{
4728 __v8si __O;
4729 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, __O,
4730 (__mmask8) -1);
4731}
4732
4733extern __inline __m256i
4734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4735_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4736{
4737 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4738 (__v8si) __O, __M);
4739}
4740
4741extern __inline __m256i
4742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4743_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4744{
4745 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4746 (__v8si)
4747 _mm256_setzero_si256 (),
4748 __M);
4749}
4750
4751extern __inline __m256i
4752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4753_mm512_cvtsepi64_epi32 (__m512i __A)
4754{
4755 __v8si __O;
4756 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, __O,
4757 (__mmask8) -1);
4758}
4759
4760extern __inline __m256i
4761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4763{
4764 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4765 (__v8si) __O, __M);
4766}
4767
4768extern __inline __m256i
4769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4770_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4771{
4772 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4773 (__v8si)
4774 _mm256_setzero_si256 (),
4775 __M);
4776}
4777
4778extern __inline __m256i
4779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4780_mm512_cvtusepi64_epi32 (__m512i __A)
4781{
4782 __v8si __O;
4783 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, __O,
4784 (__mmask8) -1);
4785}
4786
4787extern __inline __m256i
4788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4789_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4790{
4791 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4792 (__v8si) __O, __M);
4793}
4794
4795extern __inline __m256i
4796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4797_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4798{
4799 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4800 (__v8si)
4801 _mm256_setzero_si256 (),
4802 __M);
4803}
4804
4805extern __inline __m128i
4806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4807_mm512_cvtepi64_epi16 (__m512i __A)
4808{
4809 __v8hi __O;
4810 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, __O,
4811 (__mmask8) -1);
4812}
4813
4814extern __inline __m128i
4815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4817{
4818 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4819 (__v8hi) __O, __M);
4820}
4821
4822extern __inline __m128i
4823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4824_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
4825{
4826 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4827 (__v8hi)
4828 _mm_setzero_si128 (),
4829 __M);
4830}
4831
4832extern __inline __m128i
4833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4834_mm512_cvtsepi64_epi16 (__m512i __A)
4835{
4836 __v8hi __O;
4837 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, __O,
4838 (__mmask8) -1);
4839}
4840
4841extern __inline __m128i
4842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4843_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4844{
4845 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
4846 (__v8hi) __O, __M);
4847}
4848
4849extern __inline __m128i
4850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4851_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
4852{
4853 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
4854 (__v8hi)
4855 _mm_setzero_si128 (),
4856 __M);
4857}
4858
4859extern __inline __m128i
4860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4861_mm512_cvtusepi64_epi16 (__m512i __A)
4862{
4863 __v8hi __O;
4864 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, __O,
4865 (__mmask8) -1);
4866}
4867
4868extern __inline __m128i
4869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4870_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4871{
4872 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
4873 (__v8hi) __O, __M);
4874}
4875
4876extern __inline __m128i
4877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4878_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
4879{
4880 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
4881 (__v8hi)
4882 _mm_setzero_si128 (),
4883 __M);
4884}
4885
4886extern __inline __m128i
4887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4888_mm512_cvtepi64_epi8 (__m512i __A)
4889{
4890 __v16qi __O;
4891 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, __O,
4892 (__mmask8) -1);
4893}
4894
4895extern __inline __m128i
4896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4897_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
4898{
4899 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
4900 (__v16qi) __O, __M);
4901}
4902
4903extern __inline __m128i
4904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4905_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
4906{
4907 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
4908 (__v16qi)
4909 _mm_setzero_si128 (),
4910 __M);
4911}
4912
4913extern __inline __m128i
4914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4915_mm512_cvtsepi64_epi8 (__m512i __A)
4916{
4917 __v16qi __O;
4918 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, __O,
4919 (__mmask8) -1);
4920}
4921
4922extern __inline __m128i
4923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4924_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
4925{
4926 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
4927 (__v16qi) __O, __M);
4928}
4929
4930extern __inline __m128i
4931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4932_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
4933{
4934 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
4935 (__v16qi)
4936 _mm_setzero_si128 (),
4937 __M);
4938}
4939
4940extern __inline __m128i
4941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4942_mm512_cvtusepi64_epi8 (__m512i __A)
4943{
4944 __v16qi __O;
4945 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, __O,
4946 (__mmask8) -1);
4947}
4948
4949extern __inline __m128i
4950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4951_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
4952{
4953 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
4954 (__v16qi) __O,
4955 __M);
4956}
4957
4958extern __inline __m128i
4959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4960_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
4961{
4962 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
4963 (__v16qi)
4964 _mm_setzero_si128 (),
4965 __M);
4966}
4967
4968extern __inline __m512d
4969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4970_mm512_cvtepi32_pd (__m256i __A)
4971{
4972 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
4973 (__v8df)
4974 _mm512_setzero_pd (),
4975 (__mmask8) -1);
4976}
4977
4978extern __inline __m512d
4979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4980_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
4981{
4982 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
4983 (__v8df) __W,
4984 (__mmask8) __U);
4985}
4986
4987extern __inline __m512d
4988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4989_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
4990{
4991 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
4992 (__v8df)
4993 _mm512_setzero_pd (),
4994 (__mmask8) __U);
4995}
4996
4997extern __inline __m512d
4998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4999_mm512_cvtepu32_pd (__m256i __A)
5000{
5001 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5002 (__v8df)
5003 _mm512_setzero_pd (),
5004 (__mmask8) -1);
5005}
5006
5007extern __inline __m512d
5008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5009_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5010{
5011 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5012 (__v8df) __W,
5013 (__mmask8) __U);
5014}
5015
5016extern __inline __m512d
5017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5018_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5019{
5020 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5021 (__v8df)
5022 _mm512_setzero_pd (),
5023 (__mmask8) __U);
5024}
5025
5026#ifdef __OPTIMIZE__
5027extern __inline __m512
5028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5029_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5030{
5031 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5032 (__v16sf)
5033 _mm512_setzero_ps (),
5034 (__mmask16) -1, __R);
5035}
5036
5037extern __inline __m512
5038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5039_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5040 const int __R)
5041{
5042 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5043 (__v16sf) __W,
5044 (__mmask16) __U, __R);
5045}
5046
5047extern __inline __m512
5048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5049_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5050{
5051 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5052 (__v16sf)
5053 _mm512_setzero_ps (),
5054 (__mmask16) __U, __R);
5055}
5056
5057extern __inline __m512
5058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5059_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5060{
5061 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5062 (__v16sf)
5063 _mm512_setzero_ps (),
5064 (__mmask16) -1, __R);
5065}
5066
5067extern __inline __m512
5068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5069_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5070 const int __R)
5071{
5072 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5073 (__v16sf) __W,
5074 (__mmask16) __U, __R);
5075}
5076
5077extern __inline __m512
5078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5079_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5080{
5081 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5082 (__v16sf)
5083 _mm512_setzero_ps (),
5084 (__mmask16) __U, __R);
5085}
5086
5087#else
5088#define _mm512_cvt_roundepi32_ps(A, B) \
5089 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
5090
5091#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5092 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5093
5094#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5095 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5096
5097#define _mm512_cvt_roundepu32_ps(A, B) \
5098 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
5099
5100#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5101 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5102
5103#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5104 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5105#endif
5106
5107#ifdef __OPTIMIZE__
5108extern __inline __m256d
5109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5110_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5111{
5112 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5113 __imm,
5114 (__v4df)
5115 _mm256_setzero_pd (),
5116 (__mmask8) -1);
5117}
5118
5119extern __inline __m256d
5120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5122 const int __imm)
5123{
5124 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5125 __imm,
5126 (__v4df) __W,
5127 (__mmask8) __U);
5128}
5129
5130extern __inline __m256d
5131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5132_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5133{
5134 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5135 __imm,
5136 (__v4df)
5137 _mm256_setzero_pd (),
5138 (__mmask8) __U);
5139}
5140
5141extern __inline __m128
5142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5143_mm512_extractf32x4_ps (__m512 __A, const int __imm)
5144{
5145 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5146 __imm,
5147 (__v4sf)
5148 _mm_setzero_ps (),
5149 (__mmask8) -1);
5150}
5151
5152extern __inline __m128
5153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5154_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5155 const int __imm)
5156{
5157 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5158 __imm,
5159 (__v4sf) __W,
5160 (__mmask8) __U);
5161}
5162
5163extern __inline __m128
5164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5165_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5166{
5167 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5168 __imm,
5169 (__v4sf)
5170 _mm_setzero_ps (),
5171 (__mmask8) __U);
5172}
5173
5174extern __inline __m256i
5175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5176_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5177{
5178 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5179 __imm,
5180 (__v4di)
5181 _mm256_setzero_si256 (),
5182 (__mmask8) -1);
5183}
5184
5185extern __inline __m256i
5186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5188 const int __imm)
5189{
5190 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5191 __imm,
5192 (__v4di) __W,
5193 (__mmask8) __U);
5194}
5195
5196extern __inline __m256i
5197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5198_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5199{
5200 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5201 __imm,
5202 (__v4di)
5203 _mm256_setzero_si256 (),
5204 (__mmask8) __U);
5205}
5206
5207extern __inline __m128i
5208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5209_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5210{
5211 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5212 __imm,
5213 (__v4si)
5214 _mm_setzero_si128 (),
5215 (__mmask8) -1);
5216}
5217
5218extern __inline __m128i
5219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5220_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5221 const int __imm)
5222{
5223 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5224 __imm,
5225 (__v4si) __W,
5226 (__mmask8) __U);
5227}
5228
5229extern __inline __m128i
5230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5231_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5232{
5233 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5234 __imm,
5235 (__v4si)
5236 _mm_setzero_si128 (),
5237 (__mmask8) __U);
5238}
5239#else
5240
5241#define _mm512_extractf64x4_pd(X, C) \
5242 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5243 (int) (C),\
5244 (__v4df)(__m256d)_mm256_setzero_pd(),\
5245 (__mmask8)-1))
5246
5247#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5248 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5249 (int) (C),\
5250 (__v4df)(__m256d)(W),\
5251 (__mmask8)(U)))
5252
5253#define _mm512_maskz_extractf64x4_pd(U, X, C) \
5254 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5255 (int) (C),\
5256 (__v4df)(__m256d)_mm256_setzero_pd(),\
5257 (__mmask8)(U)))
5258
5259#define _mm512_extractf32x4_ps(X, C) \
5260 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5261 (int) (C),\
5262 (__v4sf)(__m128)_mm_setzero_ps(),\
5263 (__mmask8)-1))
5264
5265#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5266 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5267 (int) (C),\
5268 (__v4sf)(__m128)(W),\
5269 (__mmask8)(U)))
5270
5271#define _mm512_maskz_extractf32x4_ps(U, X, C) \
5272 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5273 (int) (C),\
5274 (__v4sf)(__m128)_mm_setzero_ps(),\
5275 (__mmask8)(U)))
5276
5277#define _mm512_extracti64x4_epi64(X, C) \
5278 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5279 (int) (C),\
5280 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5281 (__mmask8)-1))
5282
5283#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5284 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5285 (int) (C),\
5286 (__v4di)(__m256i)(W),\
5287 (__mmask8)(U)))
5288
5289#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5290 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5291 (int) (C),\
5292 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5293 (__mmask8)(U)))
5294
5295#define _mm512_extracti32x4_epi32(X, C) \
5296 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5297 (int) (C),\
5298 (__v4si)(__m128i)_mm_setzero_si128 (),\
5299 (__mmask8)-1))
5300
5301#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5302 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5303 (int) (C),\
5304 (__v4si)(__m128i)(W),\
5305 (__mmask8)(U)))
5306
5307#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5308 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5309 (int) (C),\
5310 (__v4si)(__m128i)_mm_setzero_si128 (),\
5311 (__mmask8)(U)))
5312#endif
5313
5314#ifdef __OPTIMIZE__
5315extern __inline __m512i
5316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5317_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5318{
5319 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5320 (__v4si) __B,
5321 __imm,
5322 (__v16si) __A, -1);
5323}
5324
5325extern __inline __m512
5326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5327_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5328{
5329 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5330 (__v4sf) __B,
5331 __imm,
5332 (__v16sf) __A, -1);
5333}
5334
5335extern __inline __m512i
5336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5337_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5338{
5339 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5340 (__v4di) __B,
5341 __imm,
5342 (__v8di)
5343 _mm512_setzero_si512 (),
5344 (__mmask8) -1);
5345}
5346
5347extern __inline __m512i
5348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5349_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5350 __m256i __B, const int __imm)
5351{
5352 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5353 (__v4di) __B,
5354 __imm,
5355 (__v8di) __W,
5356 (__mmask8) __U);
5357}
5358
5359extern __inline __m512i
5360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5361_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5362 const int __imm)
5363{
5364 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5365 (__v4di) __B,
5366 __imm,
5367 (__v8di)
5368 _mm512_setzero_si512 (),
5369 (__mmask8) __U);
5370}
5371
5372extern __inline __m512d
5373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5374_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5375{
5376 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5377 (__v4df) __B,
5378 __imm,
5379 (__v8df)
5380 _mm512_setzero_pd (),
5381 (__mmask8) -1);
5382}
5383
5384extern __inline __m512d
5385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5386_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5387 __m256d __B, const int __imm)
5388{
5389 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5390 (__v4df) __B,
5391 __imm,
5392 (__v8df) __W,
5393 (__mmask8) __U);
5394}
5395
5396extern __inline __m512d
5397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5398_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5399 const int __imm)
5400{
5401 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5402 (__v4df) __B,
5403 __imm,
5404 (__v8df)
5405 _mm512_setzero_pd (),
5406 (__mmask8) __U);
5407}
5408#else
5409#define _mm512_insertf32x4(X, Y, C) \
5410 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5411 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5412
5413#define _mm512_inserti32x4(X, Y, C) \
5414 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5415 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5416
5417#define _mm512_insertf64x4(X, Y, C) \
5418 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5419 (__v4df)(__m256d) (Y), (int) (C), \
5420 (__v8df)(__m512d)_mm512_setzero_pd(), \
5421 (__mmask8)-1))
5422
5423#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5424 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5425 (__v4df)(__m256d) (Y), (int) (C), \
5426 (__v8df)(__m512d)(W), \
5427 (__mmask8)(U)))
5428
5429#define _mm512_maskz_insertf64x4(U, X, Y, C) \
5430 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5431 (__v4df)(__m256d) (Y), (int) (C), \
5432 (__v8df)(__m512d)_mm512_setzero_pd(), \
5433 (__mmask8)(U)))
5434
5435#define _mm512_inserti64x4(X, Y, C) \
5436 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5437 (__v4di)(__m256i) (Y), (int) (C), \
5438 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5439 (__mmask8)-1))
5440
5441#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5442 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5443 (__v4di)(__m256i) (Y), (int) (C),\
5444 (__v8di)(__m512i)(W),\
5445 (__mmask8)(U)))
5446
5447#define _mm512_maskz_inserti64x4(U, X, Y, C) \
5448 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5449 (__v4di)(__m256i) (Y), (int) (C), \
5450 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5451 (__mmask8)(U)))
5452#endif
5453
5454extern __inline __m512d
5455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5456_mm512_loadu_pd (void const *__P)
5457{
5458 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5459 (__v8df)
5460 _mm512_setzero_pd (),
5461 (__mmask8) -1);
5462}
5463
5464extern __inline __m512d
5465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5466_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5467{
5468 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5469 (__v8df) __W,
5470 (__mmask8) __U);
5471}
5472
5473extern __inline __m512d
5474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5475_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5476{
5477 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5478 (__v8df)
5479 _mm512_setzero_pd (),
5480 (__mmask8) __U);
5481}
5482
5483extern __inline void
5484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5485_mm512_storeu_pd (void *__P, __m512d __A)
5486{
5487 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5488 (__mmask8) -1);
5489}
5490
5491extern __inline void
5492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5493_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5494{
5495 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5496 (__mmask8) __U);
5497}
5498
5499extern __inline __m512
5500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5501_mm512_loadu_ps (void const *__P)
5502{
5503 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5504 (__v16sf)
5505 _mm512_setzero_ps (),
5506 (__mmask16) -1);
5507}
5508
5509extern __inline __m512
5510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5511_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5512{
5513 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5514 (__v16sf) __W,
5515 (__mmask16) __U);
5516}
5517
5518extern __inline __m512
5519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5520_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5521{
5522 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5523 (__v16sf)
5524 _mm512_setzero_ps (),
5525 (__mmask16) __U);
5526}
5527
5528extern __inline void
5529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5530_mm512_storeu_ps (void *__P, __m512 __A)
5531{
5532 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5533 (__mmask16) -1);
5534}
5535
5536extern __inline void
5537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5538_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5539{
5540 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5541 (__mmask16) __U);
5542}
5543
5544extern __inline __m512i
5545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5546_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5547{
5548 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5549 (__v8di) __W,
5550 (__mmask8) __U);
5551}
5552
5553extern __inline __m512i
5554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5555_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5556{
5557 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5558 (__v8di)
5559 _mm512_setzero_si512 (),
5560 (__mmask8) __U);
5561}
5562
5563extern __inline void
5564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5565_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5566{
5567 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5568 (__mmask8) __U);
5569}
5570
5571extern __inline __m512i
5572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70473c63 5573_mm512_loadu_epi32 (void const *__P)
756c5857
AI
5574{
5575 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5576 (__v16si)
5577 _mm512_setzero_si512 (),
5578 (__mmask16) -1);
5579}
5580
5581extern __inline __m512i
5582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5583_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5584{
5585 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5586 (__v16si) __W,
5587 (__mmask16) __U);
5588}
5589
5590extern __inline __m512i
5591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5592_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5593{
5594 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5595 (__v16si)
5596 _mm512_setzero_si512 (),
5597 (__mmask16) __U);
5598}
5599
5600extern __inline void
5601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70473c63 5602_mm512_storeu_epi32 (void *__P, __m512i __A)
756c5857
AI
5603{
5604 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5605 (__mmask16) -1);
5606}
5607
5608extern __inline void
5609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5610_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5611{
5612 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5613 (__mmask16) __U);
5614}
5615
5616extern __inline __m512d
5617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5618_mm512_permutevar_pd (__m512d __A, __m512i __C)
5619{
5620 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5621 (__v8di) __C,
5622 (__v8df)
5623 _mm512_setzero_pd (),
5624 (__mmask8) -1);
5625}
5626
5627extern __inline __m512d
5628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5630{
5631 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5632 (__v8di) __C,
5633 (__v8df) __W,
5634 (__mmask8) __U);
5635}
5636
5637extern __inline __m512d
5638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5639_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5640{
5641 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5642 (__v8di) __C,
5643 (__v8df)
5644 _mm512_setzero_pd (),
5645 (__mmask8) __U);
5646}
5647
5648extern __inline __m512
5649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5650_mm512_permutevar_ps (__m512 __A, __m512i __C)
5651{
5652 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5653 (__v16si) __C,
5654 (__v16sf)
5655 _mm512_setzero_ps (),
5656 (__mmask16) -1);
5657}
5658
5659extern __inline __m512
5660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5661_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5662{
5663 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5664 (__v16si) __C,
5665 (__v16sf) __W,
5666 (__mmask16) __U);
5667}
5668
5669extern __inline __m512
5670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5671_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5672{
5673 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5674 (__v16si) __C,
5675 (__v16sf)
5676 _mm512_setzero_ps (),
5677 (__mmask16) __U);
5678}
5679
5680extern __inline __m512i
5681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5682_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5683{
5684 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5685 /* idx */ ,
5686 (__v8di) __A,
5687 (__v8di) __B,
5688 (__mmask8) -1);
5689}
5690
5691extern __inline __m512i
5692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5693_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5694 __m512i __B)
5695{
5696 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5697 /* idx */ ,
5698 (__v8di) __A,
5699 (__v8di) __B,
5700 (__mmask8) __U);
5701}
5702
5703extern __inline __m512i
5704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5705_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5706 __mmask8 __U, __m512i __B)
5707{
5708 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5709 (__v8di) __I
5710 /* idx */ ,
5711 (__v8di) __B,
5712 (__mmask8) __U);
5713}
5714
5715extern __inline __m512i
5716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5717_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5718 __m512i __I, __m512i __B)
5719{
5720 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5721 /* idx */ ,
5722 (__v8di) __A,
5723 (__v8di) __B,
5724 (__mmask8) __U);
5725}
5726
5727extern __inline __m512i
5728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5730{
5731 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5732 /* idx */ ,
5733 (__v16si) __A,
5734 (__v16si) __B,
5735 (__mmask16) -1);
5736}
5737
5738extern __inline __m512i
5739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5740_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5741 __m512i __I, __m512i __B)
5742{
5743 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5744 /* idx */ ,
5745 (__v16si) __A,
5746 (__v16si) __B,
5747 (__mmask16) __U);
5748}
5749
5750extern __inline __m512i
5751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5752_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5753 __mmask16 __U, __m512i __B)
5754{
5755 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5756 (__v16si) __I
5757 /* idx */ ,
5758 (__v16si) __B,
5759 (__mmask16) __U);
5760}
5761
5762extern __inline __m512i
5763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5764_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5765 __m512i __I, __m512i __B)
5766{
5767 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5768 /* idx */ ,
5769 (__v16si) __A,
5770 (__v16si) __B,
5771 (__mmask16) __U);
5772}
5773
5774extern __inline __m512d
5775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5776_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5777{
5778 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5779 /* idx */ ,
5780 (__v8df) __A,
5781 (__v8df) __B,
5782 (__mmask8) -1);
5783}
5784
5785extern __inline __m512d
5786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5787_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
5788 __m512d __B)
5789{
5790 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5791 /* idx */ ,
5792 (__v8df) __A,
5793 (__v8df) __B,
5794 (__mmask8) __U);
5795}
5796
5797extern __inline __m512d
5798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5799_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
5800 __m512d __B)
5801{
5802 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
5803 (__v8di) __I
5804 /* idx */ ,
5805 (__v8df) __B,
5806 (__mmask8) __U);
5807}
5808
5809extern __inline __m512d
5810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5811_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
5812 __m512d __B)
5813{
5814 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
5815 /* idx */ ,
5816 (__v8df) __A,
5817 (__v8df) __B,
5818 (__mmask8) __U);
5819}
5820
5821extern __inline __m512
5822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5823_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
5824{
5825 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
5826 /* idx */ ,
5827 (__v16sf) __A,
5828 (__v16sf) __B,
5829 (__mmask16) -1);
5830}
5831
5832extern __inline __m512
5833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5834_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
5835{
5836 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
5837 /* idx */ ,
5838 (__v16sf) __A,
5839 (__v16sf) __B,
5840 (__mmask16) __U);
5841}
5842
5843extern __inline __m512
5844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5845_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
5846 __m512 __B)
5847{
5848 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
5849 (__v16si) __I
5850 /* idx */ ,
5851 (__v16sf) __B,
5852 (__mmask16) __U);
5853}
5854
5855extern __inline __m512
5856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5857_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
5858 __m512 __B)
5859{
5860 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
5861 /* idx */ ,
5862 (__v16sf) __A,
5863 (__v16sf) __B,
5864 (__mmask16) __U);
5865}
5866
5867#ifdef __OPTIMIZE__
5868extern __inline __m512d
5869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5870_mm512_permute_pd (__m512d __X, const int __C)
5871{
5872 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
5873 (__v8df)
5874 _mm512_setzero_pd (),
5875 (__mmask8) -1);
5876}
5877
5878extern __inline __m512d
5879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5880_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
5881{
5882 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
5883 (__v8df) __W,
5884 (__mmask8) __U);
5885}
5886
5887extern __inline __m512d
5888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5889_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
5890{
5891 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
5892 (__v8df)
5893 _mm512_setzero_pd (),
5894 (__mmask8) __U);
5895}
5896
5897extern __inline __m512
5898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5899_mm512_permute_ps (__m512 __X, const int __C)
5900{
5901 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
5902 (__v16sf)
5903 _mm512_setzero_ps (),
5904 (__mmask16) -1);
5905}
5906
5907extern __inline __m512
5908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5909_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
5910{
5911 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
5912 (__v16sf) __W,
5913 (__mmask16) __U);
5914}
5915
5916extern __inline __m512
5917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5918_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
5919{
5920 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
5921 (__v16sf)
5922 _mm512_setzero_ps (),
5923 (__mmask16) __U);
5924}
5925#else
5926#define _mm512_permute_pd(X, C) \
5927 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
5928 (__v8df)(__m512d)(X), \
5929 (__mmask8)(-1)))
5930
5931#define _mm512_mask_permute_pd(W, U, X, C) \
5932 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
5933 (__v8df)(__m512d)(W), \
5934 (__mmask8)(U)))
5935
5936#define _mm512_maskz_permute_pd(U, X, C) \
5937 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
5938 (__v8df)(__m512d)_mm512_setzero_pd(), \
5939 (__mmask8)(U)))
5940
5941#define _mm512_permute_ps(X, C) \
5942 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
5943 (__v16sf)(__m512)(X), \
5944 (__mmask16)(-1)))
5945
5946#define _mm512_mask_permute_ps(W, U, X, C) \
5947 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
5948 (__v16sf)(__m512)(W), \
5949 (__mmask16)(U)))
5950
5951#define _mm512_maskz_permute_ps(U, X, C) \
5952 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
5953 (__v16sf)(__m512)_mm512_setzero_ps(), \
5954 (__mmask16)(U)))
5955#endif
5956
5957#ifdef __OPTIMIZE__
5958extern __inline __m512i
5959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5960_mm512_permutex_epi64 (__m512i __X, const int __I)
5961{
5962 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
5963 (__v8di)
5964 _mm512_setzero_si512 (),
5965 (__mmask8) (-1));
5966}
5967
5968extern __inline __m512i
5969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5970_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
5971 __m512i __X, const int __I)
5972{
5973 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
5974 (__v8di) __W,
5975 (__mmask8) __M);
5976}
5977
5978extern __inline __m512i
5979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5980_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
5981{
5982 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
5983 (__v8di)
5984 _mm512_setzero_si512 (),
5985 (__mmask8) __M);
5986}
5987
5988extern __inline __m512d
5989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5990_mm512_permutex_pd (__m512d __X, const int __M)
5991{
5992 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
5993 (__v8df)
5994 _mm512_setzero_pd (),
5995 (__mmask8) -1);
5996}
5997
5998extern __inline __m512d
5999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6000_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6001{
6002 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6003 (__v8df) __W,
6004 (__mmask8) __U);
6005}
6006
6007extern __inline __m512d
6008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6009_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6010{
6011 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6012 (__v8df)
6013 _mm512_setzero_pd (),
6014 (__mmask8) __U);
6015}
6016#else
6017#define _mm512_permutex_pd(X, M) \
6018 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6019 (__v8df)(__m512d)(X), (__mmask8)-1))
6020
6021#define _mm512_mask_permutex_pd(W, U, X, M) \
6022 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6023 (__v8df)(__m512d)(W), (__mmask8)(U)))
6024
6025#define _mm512_maskz_permutex_pd(U, X, M) \
6026 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6027 (__v8df)(__m512d)_mm512_setzero_pd(),\
6028 (__mmask8)(U)))
6029
6030#define _mm512_permutex_epi64(X, I) \
6031 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6032 (int)(I), \
6033 (__v8di)(__m512i)(X), \
6034 (__mmask8)(-1)))
6035
6036#define _mm512_maskz_permutex_epi64(M, X, I) \
6037 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6038 (int)(I), \
6039 (__v8di)(__m512i) \
6040 (_mm512_setzero_si512 ()),\
6041 (__mmask8)(M)))
6042
6043#define _mm512_mask_permutex_epi64(W, M, X, I) \
6044 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6045 (int)(I), \
6046 (__v8di)(__m512i)(W), \
6047 (__mmask8)(M)))
6048#endif
6049
6050extern __inline __m512i
6051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6052_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6053{
6054 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
6055 (__v8di) __Y,
6056 (__v8di)
6057 _mm512_setzero_si512 (),
6058 __M);
6059}
6060
6061extern __inline __m512i
6062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6063_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6064{
6065 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
6066 (__v8di) __Y,
6067 (__v8di)
6068 _mm512_setzero_si512 (),
6069 (__mmask8) -1);
6070}
6071
6072extern __inline __m512i
6073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6074_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6075 __m512i __Y)
6076{
6077 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
6078 (__v8di) __Y,
6079 (__v8di) __W,
6080 __M);
6081}
6082
6083extern __inline __m512i
6084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6085_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6086{
6087 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
6088 (__v16si) __Y,
6089 (__v16si)
6090 _mm512_setzero_si512 (),
6091 __M);
6092}
6093
6094extern __inline __m512i
6095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6096_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6097{
6098 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
6099 (__v16si) __Y,
6100 (__v16si)
6101 _mm512_setzero_si512 (),
6102 (__mmask16) -1);
6103}
6104
6105extern __inline __m512i
6106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6107_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6108 __m512i __Y)
6109{
6110 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
6111 (__v16si) __Y,
6112 (__v16si) __W,
6113 __M);
6114}
6115
6116extern __inline __m512d
6117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6118_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6119{
6120 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6121 (__v8di) __X,
6122 (__v8df)
6123 _mm512_setzero_pd (),
6124 (__mmask8) -1);
6125}
6126
6127extern __inline __m512d
6128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6129_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6130{
6131 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6132 (__v8di) __X,
6133 (__v8df) __W,
6134 (__mmask8) __U);
6135}
6136
6137extern __inline __m512d
6138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6139_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6140{
6141 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6142 (__v8di) __X,
6143 (__v8df)
6144 _mm512_setzero_pd (),
6145 (__mmask8) __U);
6146}
6147
6148extern __inline __m512
6149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6150_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6151{
6152 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6153 (__v16si) __X,
6154 (__v16sf)
6155 _mm512_setzero_ps (),
6156 (__mmask16) -1);
6157}
6158
6159extern __inline __m512
6160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6161_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6162{
6163 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6164 (__v16si) __X,
6165 (__v16sf) __W,
6166 (__mmask16) __U);
6167}
6168
6169extern __inline __m512
6170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6171_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6172{
6173 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6174 (__v16si) __X,
6175 (__v16sf)
6176 _mm512_setzero_ps (),
6177 (__mmask16) __U);
6178}
6179
6180#ifdef __OPTIMIZE__
6181extern __inline __m512
6182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6183_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6184{
6185 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6186 (__v16sf) __V, __imm,
6187 (__v16sf)
6188 _mm512_setzero_ps (),
6189 (__mmask16) -1);
6190}
6191
6192extern __inline __m512
6193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6194_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6195 __m512 __V, const int __imm)
6196{
6197 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6198 (__v16sf) __V, __imm,
6199 (__v16sf) __W,
6200 (__mmask16) __U);
6201}
6202
6203extern __inline __m512
6204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6205_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6206{
6207 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6208 (__v16sf) __V, __imm,
6209 (__v16sf)
6210 _mm512_setzero_ps (),
6211 (__mmask16) __U);
6212}
6213
6214extern __inline __m512d
6215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6216_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6217{
6218 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6219 (__v8df) __V, __imm,
6220 (__v8df)
6221 _mm512_setzero_pd (),
6222 (__mmask8) -1);
6223}
6224
6225extern __inline __m512d
6226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6227_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6228 __m512d __V, const int __imm)
6229{
6230 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6231 (__v8df) __V, __imm,
6232 (__v8df) __W,
6233 (__mmask8) __U);
6234}
6235
6236extern __inline __m512d
6237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6238_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6239 const int __imm)
6240{
6241 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6242 (__v8df) __V, __imm,
6243 (__v8df)
6244 _mm512_setzero_pd (),
6245 (__mmask8) __U);
6246}
6247
6248extern __inline __m512d
6249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6250_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6251 const int __imm, const int __R)
6252{
6253 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6254 (__v8df) __B,
6255 (__v8di) __C,
6256 __imm,
6257 (__mmask8) -1, __R);
6258}
6259
6260extern __inline __m512d
6261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6262_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6263 __m512i __C, const int __imm, const int __R)
6264{
6265 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6266 (__v8df) __B,
6267 (__v8di) __C,
6268 __imm,
6269 (__mmask8) __U, __R);
6270}
6271
6272extern __inline __m512d
6273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6274_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6275 __m512i __C, const int __imm, const int __R)
6276{
6277 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6278 (__v8df) __B,
6279 (__v8di) __C,
6280 __imm,
6281 (__mmask8) __U, __R);
6282}
6283
6284extern __inline __m512
6285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6286_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6287 const int __imm, const int __R)
6288{
6289 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6290 (__v16sf) __B,
6291 (__v16si) __C,
6292 __imm,
6293 (__mmask16) -1, __R);
6294}
6295
6296extern __inline __m512
6297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6298_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6299 __m512i __C, const int __imm, const int __R)
6300{
6301 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6302 (__v16sf) __B,
6303 (__v16si) __C,
6304 __imm,
6305 (__mmask16) __U, __R);
6306}
6307
6308extern __inline __m512
6309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6310_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6311 __m512i __C, const int __imm, const int __R)
6312{
6313 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6314 (__v16sf) __B,
6315 (__v16si) __C,
6316 __imm,
6317 (__mmask16) __U, __R);
6318}
6319
6320extern __inline __m128d
6321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6322_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6323 const int __imm, const int __R)
6324{
6325 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6326 (__v2df) __B,
6327 (__v2di) __C, __imm,
6328 (__mmask8) -1, __R);
6329}
6330
6331extern __inline __m128d
6332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6333_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6334 __m128i __C, const int __imm, const int __R)
6335{
6336 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6337 (__v2df) __B,
6338 (__v2di) __C, __imm,
6339 (__mmask8) __U, __R);
6340}
6341
6342extern __inline __m128d
6343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6344_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6345 __m128i __C, const int __imm, const int __R)
6346{
6347 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6348 (__v2df) __B,
6349 (__v2di) __C,
6350 __imm,
6351 (__mmask8) __U, __R);
6352}
6353
6354extern __inline __m128
6355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6356_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6357 const int __imm, const int __R)
6358{
6359 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6360 (__v4sf) __B,
6361 (__v4si) __C, __imm,
6362 (__mmask8) -1, __R);
6363}
6364
6365extern __inline __m128
6366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6367_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6368 __m128i __C, const int __imm, const int __R)
6369{
6370 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6371 (__v4sf) __B,
6372 (__v4si) __C, __imm,
6373 (__mmask8) __U, __R);
6374}
6375
6376extern __inline __m128
6377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6378_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6379 __m128i __C, const int __imm, const int __R)
6380{
6381 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6382 (__v4sf) __B,
6383 (__v4si) __C, __imm,
6384 (__mmask8) __U, __R);
6385}
6386
6387#else
6388#define _mm512_shuffle_pd(X, Y, C) \
6389 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6390 (__v8df)(__m512d)(Y), (int)(C),\
6391 (__v8df)(__m512d)_mm512_setzero_pd(),\
6392 (__mmask8)-1))
6393
6394#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6395 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6396 (__v8df)(__m512d)(Y), (int)(C),\
6397 (__v8df)(__m512d)(W),\
6398 (__mmask8)(U)))
6399
6400#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6401 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6402 (__v8df)(__m512d)(Y), (int)(C),\
6403 (__v8df)(__m512d)_mm512_setzero_pd(),\
6404 (__mmask8)(U)))
6405
6406#define _mm512_shuffle_ps(X, Y, C) \
6407 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6408 (__v16sf)(__m512)(Y), (int)(C),\
6409 (__v16sf)(__m512)_mm512_setzero_ps(),\
6410 (__mmask16)-1))
6411
6412#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6413 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6414 (__v16sf)(__m512)(Y), (int)(C),\
6415 (__v16sf)(__m512)(W),\
6416 (__mmask16)(U)))
6417
6418#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6419 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6420 (__v16sf)(__m512)(Y), (int)(C),\
6421 (__v16sf)(__m512)_mm512_setzero_ps(),\
6422 (__mmask16)(U)))
6423
6424#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6425 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6426 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6427 (__mmask8)(-1), (R)))
6428
6429#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6430 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6431 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6432 (__mmask8)(U), (R)))
6433
6434#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6435 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6436 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6437 (__mmask8)(U), (R)))
6438
6439#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6440 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6441 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6442 (__mmask16)(-1), (R)))
6443
6444#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6445 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6446 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6447 (__mmask16)(U), (R)))
6448
6449#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6450 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6451 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6452 (__mmask16)(U), (R)))
6453
6454#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6455 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6456 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6457 (__mmask8)(-1), (R)))
6458
6459#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6460 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6461 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6462 (__mmask8)(U), (R)))
6463
6464#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6465 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6466 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6467 (__mmask8)(U), (R)))
6468
6469#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6470 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6471 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6472 (__mmask8)(-1), (R)))
6473
6474#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6475 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6476 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6477 (__mmask8)(U), (R)))
6478
6479#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6480 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6481 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6482 (__mmask8)(U), (R)))
6483#endif
6484
6485extern __inline __m512
6486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6487_mm512_movehdup_ps (__m512 __A)
6488{
6489 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6490 (__v16sf)
6491 _mm512_setzero_ps (),
6492 (__mmask16) -1);
6493}
6494
6495extern __inline __m512
6496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6497_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6498{
6499 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6500 (__v16sf) __W,
6501 (__mmask16) __U);
6502}
6503
6504extern __inline __m512
6505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6506_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6507{
6508 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6509 (__v16sf)
6510 _mm512_setzero_ps (),
6511 (__mmask16) __U);
6512}
6513
6514extern __inline __m512
6515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6516_mm512_moveldup_ps (__m512 __A)
6517{
6518 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6519 (__v16sf)
6520 _mm512_setzero_ps (),
6521 (__mmask16) -1);
6522}
6523
6524extern __inline __m512
6525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6526_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6527{
6528 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6529 (__v16sf) __W,
6530 (__mmask16) __U);
6531}
6532
6533extern __inline __m512
6534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6535_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6536{
6537 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6538 (__v16sf)
6539 _mm512_setzero_ps (),
6540 (__mmask16) __U);
6541}
6542
6543extern __inline __m512i
6544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6545_mm512_or_si512 (__m512i __A, __m512i __B)
6546{
6547 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6548 (__v16si) __B,
6549 (__v16si)
6550 _mm512_setzero_si512 (),
6551 (__mmask16) -1);
6552}
6553
6554extern __inline __m512i
6555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6556_mm512_or_epi32 (__m512i __A, __m512i __B)
6557{
6558 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6559 (__v16si) __B,
6560 (__v16si)
6561 _mm512_setzero_si512 (),
6562 (__mmask16) -1);
6563}
6564
6565extern __inline __m512i
6566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6567_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6568{
6569 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6570 (__v16si) __B,
6571 (__v16si) __W,
6572 (__mmask16) __U);
6573}
6574
6575extern __inline __m512i
6576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6577_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6578{
6579 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6580 (__v16si) __B,
6581 (__v16si)
6582 _mm512_setzero_si512 (),
6583 (__mmask16) __U);
6584}
6585
6586extern __inline __m512i
6587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6588_mm512_or_epi64 (__m512i __A, __m512i __B)
6589{
6590 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6591 (__v8di) __B,
6592 (__v8di)
6593 _mm512_setzero_si512 (),
6594 (__mmask8) -1);
6595}
6596
6597extern __inline __m512i
6598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6599_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6600{
6601 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6602 (__v8di) __B,
6603 (__v8di) __W,
6604 (__mmask8) __U);
6605}
6606
6607extern __inline __m512i
6608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6609_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6610{
6611 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6612 (__v8di) __B,
6613 (__v8di)
6614 _mm512_setzero_si512 (),
6615 (__mmask8) __U);
6616}
6617
6618extern __inline __m512i
6619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6620_mm512_xor_si512 (__m512i __A, __m512i __B)
6621{
6622 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6623 (__v16si) __B,
6624 (__v16si)
6625 _mm512_setzero_si512 (),
6626 (__mmask16) -1);
6627}
6628
6629extern __inline __m512i
6630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6631_mm512_xor_epi32 (__m512i __A, __m512i __B)
6632{
6633 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6634 (__v16si) __B,
6635 (__v16si)
6636 _mm512_setzero_si512 (),
6637 (__mmask16) -1);
6638}
6639
6640extern __inline __m512i
6641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6642_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6643{
6644 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6645 (__v16si) __B,
6646 (__v16si) __W,
6647 (__mmask16) __U);
6648}
6649
6650extern __inline __m512i
6651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6652_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6653{
6654 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6655 (__v16si) __B,
6656 (__v16si)
6657 _mm512_setzero_si512 (),
6658 (__mmask16) __U);
6659}
6660
6661extern __inline __m512i
6662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6663_mm512_xor_epi64 (__m512i __A, __m512i __B)
6664{
6665 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6666 (__v8di) __B,
6667 (__v8di)
6668 _mm512_setzero_si512 (),
6669 (__mmask8) -1);
6670}
6671
6672extern __inline __m512i
6673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6674_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6675{
6676 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6677 (__v8di) __B,
6678 (__v8di) __W,
6679 (__mmask8) __U);
6680}
6681
6682extern __inline __m512i
6683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6684_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6685{
6686 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6687 (__v8di) __B,
6688 (__v8di)
6689 _mm512_setzero_si512 (),
6690 (__mmask8) __U);
6691}
6692
6693#ifdef __OPTIMIZE__
6694extern __inline __m512i
6695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6696_mm512_rol_epi32 (__m512i __A, const int __B)
6697{
6698 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6699 (__v16si)
6700 _mm512_setzero_si512 (),
6701 (__mmask16) -1);
6702}
6703
6704extern __inline __m512i
6705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6706_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6707{
6708 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6709 (__v16si) __W,
6710 (__mmask16) __U);
6711}
6712
6713extern __inline __m512i
6714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6715_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6716{
6717 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6718 (__v16si)
6719 _mm512_setzero_si512 (),
6720 (__mmask16) __U);
6721}
6722
6723extern __inline __m512i
6724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6725_mm512_ror_epi32 (__m512i __A, int __B)
6726{
6727 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6728 (__v16si)
6729 _mm512_setzero_si512 (),
6730 (__mmask16) -1);
6731}
6732
6733extern __inline __m512i
6734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6736{
6737 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6738 (__v16si) __W,
6739 (__mmask16) __U);
6740}
6741
6742extern __inline __m512i
6743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6745{
6746 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6747 (__v16si)
6748 _mm512_setzero_si512 (),
6749 (__mmask16) __U);
6750}
6751
6752extern __inline __m512i
6753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6754_mm512_rol_epi64 (__m512i __A, const int __B)
6755{
6756 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6757 (__v8di)
6758 _mm512_setzero_si512 (),
6759 (__mmask8) -1);
6760}
6761
6762extern __inline __m512i
6763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6764_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6765{
6766 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6767 (__v8di) __W,
6768 (__mmask8) __U);
6769}
6770
6771extern __inline __m512i
6772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6773_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6774{
6775 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6776 (__v8di)
6777 _mm512_setzero_si512 (),
6778 (__mmask8) __U);
6779}
6780
6781extern __inline __m512i
6782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6783_mm512_ror_epi64 (__m512i __A, int __B)
6784{
6785 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6786 (__v8di)
6787 _mm512_setzero_si512 (),
6788 (__mmask8) -1);
6789}
6790
6791extern __inline __m512i
6792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6793_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6794{
6795 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6796 (__v8di) __W,
6797 (__mmask8) __U);
6798}
6799
6800extern __inline __m512i
6801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6802_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6803{
6804 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6805 (__v8di)
6806 _mm512_setzero_si512 (),
6807 (__mmask8) __U);
6808}
6809
6810#else
6811#define _mm512_rol_epi32(A, B) \
6812 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
6813 (int)(B), \
6814 (__v16si)_mm512_setzero_si512 (), \
6815 (__mmask16)(-1)))
6816#define _mm512_mask_rol_epi32(W, U, A, B) \
6817 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
6818 (int)(B), \
6819 (__v16si)(__m512i)(W), \
6820 (__mmask16)(U)))
6821#define _mm512_maskz_rol_epi32(U, A, B) \
6822 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
6823 (int)(B), \
6824 (__v16si)_mm512_setzero_si512 (), \
6825 (__mmask16)(U)))
6826#define _mm512_ror_epi32(A, B) \
6827 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
6828 (int)(B), \
6829 (__v16si)_mm512_setzero_si512 (), \
6830 (__mmask16)(-1)))
6831#define _mm512_mask_ror_epi32(W, U, A, B) \
6832 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
6833 (int)(B), \
6834 (__v16si)(__m512i)(W), \
6835 (__mmask16)(U)))
6836#define _mm512_maskz_ror_epi32(U, A, B) \
6837 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
6838 (int)(B), \
6839 (__v16si)_mm512_setzero_si512 (), \
6840 (__mmask16)(U)))
6841#define _mm512_rol_epi64(A, B) \
6842 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
6843 (int)(B), \
6844 (__v8di)_mm512_setzero_si512 (), \
6845 (__mmask8)(-1)))
6846#define _mm512_mask_rol_epi64(W, U, A, B) \
6847 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
6848 (int)(B), \
6849 (__v8di)(__m512i)(W), \
6850 (__mmask8)(U)))
6851#define _mm512_maskz_rol_epi64(U, A, B) \
6852 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
6853 (int)(B), \
6854 (__v8di)_mm512_setzero_si512 (), \
6855 (__mmask8)(U)))
6856
6857#define _mm512_ror_epi64(A, B) \
6858 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
6859 (int)(B), \
6860 (__v8di)_mm512_setzero_si512 (), \
6861 (__mmask8)(-1)))
6862#define _mm512_mask_ror_epi64(W, U, A, B) \
6863 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
6864 (int)(B), \
6865 (__v8di)(__m512i)(W), \
6866 (__mmask8)(U)))
6867#define _mm512_maskz_ror_epi64(U, A, B) \
6868 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
6869 (int)(B), \
6870 (__v8di)_mm512_setzero_si512 (), \
6871 (__mmask8)(U)))
6872#endif
6873
6874extern __inline __m512i
6875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6876_mm512_and_si512 (__m512i __A, __m512i __B)
6877{
6878 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
6879 (__v16si) __B,
6880 (__v16si)
6881 _mm512_setzero_si512 (),
6882 (__mmask16) -1);
6883}
6884
6885extern __inline __m512i
6886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6887_mm512_and_epi32 (__m512i __A, __m512i __B)
6888{
6889 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
6890 (__v16si) __B,
6891 (__v16si)
6892 _mm512_setzero_si512 (),
6893 (__mmask16) -1);
6894}
6895
6896extern __inline __m512i
6897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6898_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6899{
6900 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
6901 (__v16si) __B,
6902 (__v16si) __W,
6903 (__mmask16) __U);
6904}
6905
6906extern __inline __m512i
6907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6908_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6909{
6910 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
6911 (__v16si) __B,
6912 (__v16si)
6913 _mm512_setzero_si512 (),
6914 (__mmask16) __U);
6915}
6916
6917extern __inline __m512i
6918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6919_mm512_and_epi64 (__m512i __A, __m512i __B)
6920{
6921 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
6922 (__v8di) __B,
6923 (__v8di)
6924 _mm512_setzero_si512 (),
6925 (__mmask8) -1);
6926}
6927
6928extern __inline __m512i
6929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6930_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6931{
6932 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
6933 (__v8di) __B,
6934 (__v8di) __W, __U);
6935}
6936
6937extern __inline __m512i
6938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6939_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6940{
6941 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
6942 (__v8di) __B,
6943 (__v8di)
6944 _mm512_setzero_pd (),
6945 __U);
6946}
6947
6948extern __inline __m512i
6949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6950_mm512_andnot_si512 (__m512i __A, __m512i __B)
6951{
6952 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
6953 (__v16si) __B,
6954 (__v16si)
6955 _mm512_setzero_si512 (),
6956 (__mmask16) -1);
6957}
6958
6959extern __inline __m512i
6960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6961_mm512_andnot_epi32 (__m512i __A, __m512i __B)
6962{
6963 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
6964 (__v16si) __B,
6965 (__v16si)
6966 _mm512_setzero_si512 (),
6967 (__mmask16) -1);
6968}
6969
6970extern __inline __m512i
6971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6972_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6973{
6974 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
6975 (__v16si) __B,
6976 (__v16si) __W,
6977 (__mmask16) __U);
6978}
6979
6980extern __inline __m512i
6981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6982_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6983{
6984 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
6985 (__v16si) __B,
6986 (__v16si)
6987 _mm512_setzero_si512 (),
6988 (__mmask16) __U);
6989}
6990
6991extern __inline __m512i
6992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6993_mm512_andnot_epi64 (__m512i __A, __m512i __B)
6994{
6995 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
6996 (__v8di) __B,
6997 (__v8di)
6998 _mm512_setzero_si512 (),
6999 (__mmask8) -1);
7000}
7001
7002extern __inline __m512i
7003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7004_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7005{
7006 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7007 (__v8di) __B,
7008 (__v8di) __W, __U);
7009}
7010
7011extern __inline __m512i
7012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7013_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7014{
7015 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7016 (__v8di) __B,
7017 (__v8di)
7018 _mm512_setzero_pd (),
7019 __U);
7020}
7021
7022extern __inline __mmask16
7023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7024_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7025{
7026 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7027 (__v16si) __B,
7028 (__mmask16) -1);
7029}
7030
7031extern __inline __mmask16
7032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7033_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7034{
7035 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7036 (__v16si) __B, __U);
7037}
7038
7039extern __inline __mmask8
7040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7041_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7042{
7043 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7044 (__v8di) __B,
7045 (__mmask8) -1);
7046}
7047
7048extern __inline __mmask8
7049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7050_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7051{
7052 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7053}
7054
7055extern __inline __m512i
7056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7057_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7058{
7059 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7060 (__v16si) __B,
7061 (__v16si)
7062 _mm512_setzero_si512 (),
7063 (__mmask16) -1);
7064}
7065
7066extern __inline __m512i
7067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7068_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7069 __m512i __B)
7070{
7071 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7072 (__v16si) __B,
7073 (__v16si) __W,
7074 (__mmask16) __U);
7075}
7076
7077extern __inline __m512i
7078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7079_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7080{
7081 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7082 (__v16si) __B,
7083 (__v16si)
7084 _mm512_setzero_si512 (),
7085 (__mmask16) __U);
7086}
7087
7088extern __inline __m512i
7089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7090_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7091{
7092 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7093 (__v8di) __B,
7094 (__v8di)
7095 _mm512_setzero_si512 (),
7096 (__mmask8) -1);
7097}
7098
7099extern __inline __m512i
7100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7101_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7102{
7103 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7104 (__v8di) __B,
7105 (__v8di) __W,
7106 (__mmask8) __U);
7107}
7108
7109extern __inline __m512i
7110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7111_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7112{
7113 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7114 (__v8di) __B,
7115 (__v8di)
7116 _mm512_setzero_si512 (),
7117 (__mmask8) __U);
7118}
7119
7120extern __inline __m512i
7121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7122_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7123{
7124 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7125 (__v16si) __B,
7126 (__v16si)
7127 _mm512_setzero_si512 (),
7128 (__mmask16) -1);
7129}
7130
7131extern __inline __m512i
7132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7133_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7134 __m512i __B)
7135{
7136 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7137 (__v16si) __B,
7138 (__v16si) __W,
7139 (__mmask16) __U);
7140}
7141
7142extern __inline __m512i
7143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7144_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7145{
7146 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7147 (__v16si) __B,
7148 (__v16si)
7149 _mm512_setzero_si512 (),
7150 (__mmask16) __U);
7151}
7152
7153extern __inline __m512i
7154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7155_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7156{
7157 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7158 (__v8di) __B,
7159 (__v8di)
7160 _mm512_setzero_si512 (),
7161 (__mmask8) -1);
7162}
7163
7164extern __inline __m512i
7165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7166_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7167{
7168 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7169 (__v8di) __B,
7170 (__v8di) __W,
7171 (__mmask8) __U);
7172}
7173
7174extern __inline __m512i
7175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7176_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7177{
7178 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7179 (__v8di) __B,
7180 (__v8di)
7181 _mm512_setzero_si512 (),
7182 (__mmask8) __U);
7183}
7184
7185#ifdef __x86_64__
7186#ifdef __OPTIMIZE__
7187extern __inline unsigned long long
7188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7189_mm_cvt_roundss_u64 (__m128 __A, const int __R)
7190{
7191 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7192}
7193
7194extern __inline long long
7195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7196_mm_cvt_roundss_si64 (__m128 __A, const int __R)
7197{
7198 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7199}
7200
7201extern __inline long long
7202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7203_mm_cvt_roundss_i64 (__m128 __A, const int __R)
7204{
7205 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7206}
7207
7208extern __inline unsigned long long
7209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7210_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7211{
7212 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7213}
7214
7215extern __inline long long
7216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7217_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7218{
7219 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7220}
7221
7222extern __inline long long
7223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7224_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7225{
7226 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7227}
7228#else
7229#define _mm_cvt_roundss_u64(A, B) \
7230 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7231
7232#define _mm_cvt_roundss_si64(A, B) \
7233 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7234
7235#define _mm_cvt_roundss_i64(A, B) \
7236 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7237
7238#define _mm_cvtt_roundss_u64(A, B) \
7239 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7240
7241#define _mm_cvtt_roundss_i64(A, B) \
7242 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7243
7244#define _mm_cvtt_roundss_si64(A, B) \
7245 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7246#endif
7247#endif
7248
7249#ifdef __OPTIMIZE__
7250extern __inline unsigned
7251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7252_mm_cvt_roundss_u32 (__m128 __A, const int __R)
7253{
7254 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7255}
7256
7257extern __inline int
7258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7259_mm_cvt_roundss_si32 (__m128 __A, const int __R)
7260{
7261 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7262}
7263
7264extern __inline int
7265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7266_mm_cvt_roundss_i32 (__m128 __A, const int __R)
7267{
7268 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7269}
7270
7271extern __inline unsigned
7272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7273_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7274{
7275 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7276}
7277
7278extern __inline int
7279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7280_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7281{
7282 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7283}
7284
7285extern __inline int
7286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7287_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7288{
7289 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7290}
7291#else
7292#define _mm_cvt_roundss_u32(A, B) \
7293 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7294
7295#define _mm_cvt_roundss_si32(A, B) \
7296 ((int)__builtin_ia32_vcvtss2si32(A, B))
7297
7298#define _mm_cvt_roundss_i32(A, B) \
7299 ((int)__builtin_ia32_vcvtss2si32(A, B))
7300
7301#define _mm_cvtt_roundss_u32(A, B) \
7302 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7303
7304#define _mm_cvtt_roundss_si32(A, B) \
7305 ((int)__builtin_ia32_vcvttss2si32(A, B))
7306
7307#define _mm_cvtt_roundss_i32(A, B) \
7308 ((int)__builtin_ia32_vcvttss2si32(A, B))
7309#endif
7310
7311#ifdef __x86_64__
7312#ifdef __OPTIMIZE__
7313extern __inline unsigned long long
7314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7315_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7316{
7317 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7318}
7319
7320extern __inline long long
7321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7322_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7323{
7324 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7325}
7326
7327extern __inline long long
7328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7329_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7330{
7331 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7332}
7333
7334extern __inline unsigned long long
7335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7337{
7338 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7339}
7340
7341extern __inline long long
7342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7343_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7344{
7345 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7346}
7347
7348extern __inline long long
7349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7350_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7351{
7352 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7353}
7354#else
7355#define _mm_cvt_roundsd_u64(A, B) \
7356 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7357
7358#define _mm_cvt_roundsd_si64(A, B) \
7359 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7360
7361#define _mm_cvt_roundsd_i64(A, B) \
7362 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7363
7364#define _mm_cvtt_roundsd_u64(A, B) \
7365 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7366
7367#define _mm_cvtt_roundsd_si64(A, B) \
7368 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7369
7370#define _mm_cvtt_roundsd_i64(A, B) \
7371 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7372#endif
7373#endif
7374
7375#ifdef __OPTIMIZE__
7376extern __inline unsigned
7377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7378_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7379{
7380 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7381}
7382
7383extern __inline int
7384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7385_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7386{
7387 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7388}
7389
7390extern __inline int
7391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7392_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7393{
7394 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7395}
7396
7397extern __inline unsigned
7398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7399_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7400{
7401 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7402}
7403
7404extern __inline int
7405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7406_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7407{
7408 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7409}
7410
7411extern __inline int
7412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7413_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7414{
7415 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7416}
7417#else
7418#define _mm_cvt_roundsd_u32(A, B) \
7419 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7420
7421#define _mm_cvt_roundsd_si32(A, B) \
7422 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7423
7424#define _mm_cvt_roundsd_i32(A, B) \
7425 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7426
7427#define _mm_cvtt_roundsd_u32(A, B) \
7428 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7429
7430#define _mm_cvtt_roundsd_si32(A, B) \
7431 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7432
7433#define _mm_cvtt_roundsd_i32(A, B) \
7434 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7435#endif
7436
7437extern __inline __m512d
7438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7439_mm512_movedup_pd (__m512d __A)
7440{
7441 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7442 (__v8df)
7443 _mm512_setzero_pd (),
7444 (__mmask8) -1);
7445}
7446
7447extern __inline __m512d
7448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7449_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7450{
7451 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7452 (__v8df) __W,
7453 (__mmask8) __U);
7454}
7455
7456extern __inline __m512d
7457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7459{
7460 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7461 (__v8df)
7462 _mm512_setzero_pd (),
7463 (__mmask8) __U);
7464}
7465
7466extern __inline __m512d
7467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7468_mm512_unpacklo_pd (__m512d __A, __m512d __B)
7469{
7470 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7471 (__v8df) __B,
7472 (__v8df)
7473 _mm512_setzero_pd (),
7474 (__mmask8) -1);
7475}
7476
7477extern __inline __m512d
7478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7479_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7480{
7481 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7482 (__v8df) __B,
7483 (__v8df) __W,
7484 (__mmask8) __U);
7485}
7486
7487extern __inline __m512d
7488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7489_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7490{
7491 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7492 (__v8df) __B,
7493 (__v8df)
7494 _mm512_setzero_pd (),
7495 (__mmask8) __U);
7496}
7497
7498extern __inline __m512d
7499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7500_mm512_unpackhi_pd (__m512d __A, __m512d __B)
7501{
7502 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7503 (__v8df) __B,
7504 (__v8df)
7505 _mm512_setzero_pd (),
7506 (__mmask8) -1);
7507}
7508
7509extern __inline __m512d
7510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7511_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7512{
7513 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7514 (__v8df) __B,
7515 (__v8df) __W,
7516 (__mmask8) __U);
7517}
7518
7519extern __inline __m512d
7520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7521_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7522{
7523 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7524 (__v8df) __B,
7525 (__v8df)
7526 _mm512_setzero_pd (),
7527 (__mmask8) __U);
7528}
7529
7530extern __inline __m512
7531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532_mm512_unpackhi_ps (__m512 __A, __m512 __B)
7533{
7534 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7535 (__v16sf) __B,
7536 (__v16sf)
7537 _mm512_setzero_ps (),
7538 (__mmask16) -1);
7539}
7540
7541extern __inline __m512
7542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7543_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7544{
7545 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7546 (__v16sf) __B,
7547 (__v16sf) __W,
7548 (__mmask16) __U);
7549}
7550
7551extern __inline __m512
7552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7553_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7554{
7555 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7556 (__v16sf) __B,
7557 (__v16sf)
7558 _mm512_setzero_ps (),
7559 (__mmask16) __U);
7560}
7561
7562#ifdef __OPTIMIZE__
7563extern __inline __m512d
7564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565_mm512_cvt_roundps_pd (__m256 __A, const int __R)
7566{
7567 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7568 (__v8df)
7569 _mm512_setzero_pd (),
7570 (__mmask8) -1, __R);
7571}
7572
7573extern __inline __m512d
7574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7575_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7576 const int __R)
7577{
7578 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7579 (__v8df) __W,
7580 (__mmask8) __U, __R);
7581}
7582
7583extern __inline __m512d
7584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7585_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7586{
7587 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7588 (__v8df)
7589 _mm512_setzero_pd (),
7590 (__mmask8) __U, __R);
7591}
7592
7593extern __inline __m512
7594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7595_mm512_cvt_roundph_ps (__m256i __A, const int __R)
7596{
7597 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7598 (__v16sf)
7599 _mm512_setzero_ps (),
7600 (__mmask16) -1, __R);
7601}
7602
7603extern __inline __m512
7604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7605_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7606 const int __R)
7607{
7608 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7609 (__v16sf) __W,
7610 (__mmask16) __U, __R);
7611}
7612
7613extern __inline __m512
7614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7615_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7616{
7617 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7618 (__v16sf)
7619 _mm512_setzero_ps (),
7620 (__mmask16) __U, __R);
7621}
7622
7623extern __inline __m256i
7624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7625_mm512_cvt_roundps_ph (__m512 __A, const int __I)
7626{
7627 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7628 __I,
7629 (__v16hi)
7630 _mm256_setzero_si256 (),
7631 -1);
7632}
7633
7634extern __inline __m256i
7635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7636_mm512_cvtps_ph (__m512 __A, const int __I)
7637{
7638 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7639 __I,
7640 (__v16hi)
7641 _mm256_setzero_si256 (),
7642 -1);
7643}
7644
7645extern __inline __m256i
7646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7647_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7648 const int __I)
7649{
7650 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7651 __I,
7652 (__v16hi) __U,
7653 (__mmask16) __W);
7654}
7655
7656extern __inline __m256i
7657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7658_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7659{
7660 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7661 __I,
7662 (__v16hi) __U,
7663 (__mmask16) __W);
7664}
7665
7666extern __inline __m256i
7667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7668_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7669{
7670 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7671 __I,
7672 (__v16hi)
7673 _mm256_setzero_si256 (),
7674 (__mmask16) __W);
7675}
7676
7677extern __inline __m256i
7678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7679_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7680{
7681 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7682 __I,
7683 (__v16hi)
7684 _mm256_setzero_si256 (),
7685 (__mmask16) __W);
7686}
7687#else
7688#define _mm512_cvt_roundps_pd(A, B) \
7689 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, B)
7690
7691#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7692 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7693
7694#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7695 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7696
7697#define _mm512_cvt_roundph_ps(A, B) \
7698 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
7699
7700#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7701 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7702
7703#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7704 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7705
7706#define _mm512_cvt_roundps_ph(A, I) \
7707 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7708 (__v16hi)_mm256_setzero_si256 (), -1))
7709#define _mm512_cvtps_ph(A, I) \
7710 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7711 (__v16hi)_mm256_setzero_si256 (), -1))
7712#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7713 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7714 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7715#define _mm512_mask_cvtps_ph(U, W, A, I) \
7716 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7717 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7718#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7719 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7720 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7721#define _mm512_maskz_cvtps_ph(W, A, I) \
7722 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7723 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7724#endif
7725
7726#ifdef __OPTIMIZE__
7727extern __inline __m256
7728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7729_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7730{
7731 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7732 (__v8sf)
7733 _mm256_setzero_ps (),
7734 (__mmask8) -1, __R);
7735}
7736
7737extern __inline __m256
7738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7739_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7740 const int __R)
7741{
7742 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7743 (__v8sf) __W,
7744 (__mmask8) __U, __R);
7745}
7746
7747extern __inline __m256
7748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7749_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7750{
7751 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7752 (__v8sf)
7753 _mm256_setzero_ps (),
7754 (__mmask8) __U, __R);
7755}
7756
075691af
AI
7757extern __inline __m128
7758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7759_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7760{
7761 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7762 (__v2df) __B,
7763 __R);
7764}
7765
7766extern __inline __m128d
7767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7768_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7769{
7770 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7771 (__v4sf) __B,
7772 __R);
7773}
756c5857
AI
7774#else
7775#define _mm512_cvt_roundpd_ps(A, B) \
7776 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), -1, B)
7777
7778#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7779 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7780
7781#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7782 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
075691af
AI
7783
7784#define _mm_cvt_roundsd_ss(A, B, C) \
7785 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
7786
7787#define _mm_cvt_roundss_sd(A, B, C) \
7788 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
756c5857
AI
7789#endif
7790
7791extern __inline void
7792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7793_mm512_stream_si512 (__m512i * __P, __m512i __A)
7794{
7795 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
7796}
7797
7798extern __inline void
7799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7800_mm512_stream_ps (float *__P, __m512 __A)
7801{
7802 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
7803}
7804
7805extern __inline void
7806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7807_mm512_stream_pd (double *__P, __m512d __A)
7808{
7809 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
7810}
7811
c56a42b9
KY
7812extern __inline __m512i
7813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7814_mm512_stream_load_si512 (void *__P)
7815{
7816 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
7817}
7818
756c5857 7819#ifdef __OPTIMIZE__
075691af
AI
7820extern __inline __m128
7821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7822_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
7823{
7824 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
7825 (__v4sf) __B,
7826 __R);
7827}
7828
7829extern __inline __m128d
7830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7831_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
7832{
7833 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
7834 (__v2df) __B,
7835 __R);
7836}
7837
756c5857
AI
7838extern __inline __m512
7839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840_mm512_getexp_round_ps (__m512 __A, const int __R)
7841{
7842 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7843 (__v16sf)
7844 _mm512_setzero_ps (),
7845 (__mmask16) -1, __R);
7846}
7847
7848extern __inline __m512
7849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7850_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
7851 const int __R)
7852{
7853 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7854 (__v16sf) __W,
7855 (__mmask16) __U, __R);
7856}
7857
7858extern __inline __m512
7859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7860_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
7861{
7862 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7863 (__v16sf)
7864 _mm512_setzero_ps (),
7865 (__mmask16) __U, __R);
7866}
7867
7868extern __inline __m512d
7869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7870_mm512_getexp_round_pd (__m512d __A, const int __R)
7871{
7872 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7873 (__v8df)
7874 _mm512_setzero_pd (),
7875 (__mmask8) -1, __R);
7876}
7877
7878extern __inline __m512d
7879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7880_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
7881 const int __R)
7882{
7883 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7884 (__v8df) __W,
7885 (__mmask8) __U, __R);
7886}
7887
7888extern __inline __m512d
7889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
7891{
7892 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7893 (__v8df)
7894 _mm512_setzero_pd (),
7895 (__mmask8) __U, __R);
7896}
7897
7898/* Constants for mantissa extraction */
7899typedef enum
7900{
7901 _MM_MANT_NORM_1_2, /* interval [1, 2) */
7902 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
7903 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
7904 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
7905} _MM_MANTISSA_NORM_ENUM;
7906
7907typedef enum
7908{
7909 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
7910 _MM_MANT_SIGN_zero, /* sign = 0 */
7911 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
7912} _MM_MANTISSA_SIGN_ENUM;
7913
7914extern __inline __m512d
7915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7916_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
7917 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
7918{
7919 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
7920 (__C << 2) | __B,
7921 _mm512_setzero_pd (),
7922 (__mmask8) -1, __R);
7923}
7924
7925extern __inline __m512d
7926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7927_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
7928 _MM_MANTISSA_NORM_ENUM __B,
7929 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
7930{
7931 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
7932 (__C << 2) | __B,
7933 (__v8df) __W, __U,
7934 __R);
7935}
7936
7937extern __inline __m512d
7938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7939_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
7940 _MM_MANTISSA_NORM_ENUM __B,
7941 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
7942{
7943 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
7944 (__C << 2) | __B,
7945 (__v8df)
7946 _mm512_setzero_pd (),
7947 __U, __R);
7948}
7949
7950extern __inline __m512
7951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7952_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
7953 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
7954{
7955 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
7956 (__C << 2) | __B,
7957 _mm512_setzero_ps (),
7958 (__mmask16) -1, __R);
7959}
7960
7961extern __inline __m512
7962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7963_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
7964 _MM_MANTISSA_NORM_ENUM __B,
7965 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
7966{
7967 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
7968 (__C << 2) | __B,
7969 (__v16sf) __W, __U,
7970 __R);
7971}
7972
7973extern __inline __m512
7974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7975_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
7976 _MM_MANTISSA_NORM_ENUM __B,
7977 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
7978{
7979 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
7980 (__C << 2) | __B,
7981 (__v16sf)
7982 _mm512_setzero_ps (),
7983 __U, __R);
7984}
7985
075691af
AI
7986extern __inline __m128d
7987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7988_mm_getmant_round_sd (__m128d __A, __m128d __B,
7989 _MM_MANTISSA_NORM_ENUM __C,
7990 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
7991{
7992 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
7993 (__v2df) __B,
7994 (__D << 2) | __C,
7995 __R);
7996}
7997
7998extern __inline __m128
7999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8000_mm_getmant_round_ss (__m128 __A, __m128 __B,
8001 _MM_MANTISSA_NORM_ENUM __C,
8002 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8003{
8004 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8005 (__v4sf) __B,
8006 (__D << 2) | __C,
8007 __R);
8008}
8009
756c5857
AI
8010#else
8011#define _mm512_getmant_round_pd(X, B, C, R) \
8012 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8013 (int)(((C)<<2) | (B)), \
8014 (__v8df)(__m512d)_mm512_setzero_pd(), \
8015 (__mmask8)-1,\
8016 (R)))
8017
8018#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8019 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8020 (int)(((C)<<2) | (B)), \
8021 (__v8df)(__m512d)(W), \
8022 (__mmask8)(U),\
8023 (R)))
8024
8025#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8026 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8027 (int)(((C)<<2) | (B)), \
8028 (__v8df)(__m512d)_mm512_setzero_pd(), \
8029 (__mmask8)(U),\
8030 (R)))
8031#define _mm512_getmant_round_ps(X, B, C, R) \
8032 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8033 (int)(((C)<<2) | (B)), \
8034 (__v16sf)(__m512)_mm512_setzero_ps(), \
8035 (__mmask16)-1,\
8036 (R)))
8037
8038#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8039 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8040 (int)(((C)<<2) | (B)), \
8041 (__v16sf)(__m512)(W), \
8042 (__mmask16)(U),\
8043 (R)))
8044
8045#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8046 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8047 (int)(((C)<<2) | (B)), \
8048 (__v16sf)(__m512)_mm512_setzero_ps(), \
8049 (__mmask16)(U),\
8050 (R)))
075691af
AI
8051#define _mm_getmant_round_sd(X, Y, C, D, R) \
8052 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8053 (__v2df)(__m128d)(Y), \
8054 (int)(((D)<<2) | (C)), \
8055 (R)))
8056
8057#define _mm_getmant_round_ss(X, Y, C, D, R) \
8058 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8059 (__v4sf)(__m128)(Y), \
8060 (int)(((D)<<2) | (C)), \
8061 (R)))
8062
8063#define _mm_getexp_round_ss(A, B, R) \
8064 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8065
8066#define _mm_getexp_round_sd(A, B, R) \
8067 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8068
756c5857
AI
8069#define _mm512_getexp_round_ps(A, R) \
8070 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8071 (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, R))
8072
8073#define _mm512_mask_getexp_round_ps(W, U, A, R) \
8074 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8075 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8076
8077#define _mm512_maskz_getexp_round_ps(U, A, R) \
8078 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8079 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8080
8081#define _mm512_getexp_round_pd(A, R) \
8082 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8083 (__v8df)_mm512_setzero_pd(), (__mmask8)-1, R))
8084
8085#define _mm512_mask_getexp_round_pd(W, U, A, R) \
8086 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8087 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8088
8089#define _mm512_maskz_getexp_round_pd(U, A, R) \
8090 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8091 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8092#endif
8093
8094#ifdef __OPTIMIZE__
8095extern __inline __m512
8096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8097_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8098{
8099 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8100 (__v16sf) __A, -1, __R);
8101}
8102
8103extern __inline __m512
8104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8105_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8106 const int __imm, const int __R)
8107{
8108 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8109 (__v16sf) __A,
8110 (__mmask16) __B, __R);
8111}
8112
8113extern __inline __m512
8114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8115_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8116 const int __imm, const int __R)
8117{
8118 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8119 __imm,
8120 (__v16sf)
8121 _mm512_setzero_ps (),
8122 (__mmask16) __A, __R);
8123}
8124
8125extern __inline __m512d
8126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8127_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8128{
8129 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8130 (__v8df) __A, -1, __R);
8131}
8132
8133extern __inline __m512d
8134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8135_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8136 __m512d __C, const int __imm, const int __R)
8137{
8138 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8139 (__v8df) __A,
8140 (__mmask8) __B, __R);
8141}
8142
8143extern __inline __m512d
8144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8145_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8146 const int __imm, const int __R)
8147{
8148 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8149 __imm,
8150 (__v8df)
8151 _mm512_setzero_pd (),
8152 (__mmask8) __A, __R);
8153}
075691af
AI
8154
8155extern __inline __m128
8156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8157_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8158{
8159 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8160 (__v4sf) __B, __imm, __R);
8161}
8162
8163extern __inline __m128d
8164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8165_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8166 const int __R)
8167{
8168 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8169 (__v2df) __B, __imm, __R);
8170}
8171
756c5857
AI
8172#else
8173#define _mm512_roundscale_round_ps(A, B, R) \
8174 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8175 (__v16sf)(__m512)(A), (__mmask16)(-1), R))
8176#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8177 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8178 (int)(D), \
8179 (__v16sf)(__m512)(A), \
8180 (__mmask16)(B), R))
8181#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8182 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8183 (int)(C), \
8184 (__v16sf)_mm512_setzero_ps(),\
8185 (__mmask16)(A), R))
8186#define _mm512_roundscale_round_pd(A, B, R) \
8187 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8188 (__v8df)(__m512d)(A), (__mmask8)(-1), R))
8189#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8190 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8191 (int)(D), \
8192 (__v8df)(__m512d)(A), \
8193 (__mmask8)(B), R))
8194#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8195 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8196 (int)(C), \
8197 (__v8df)_mm512_setzero_pd(),\
8198 (__mmask8)(A), R))
075691af
AI
8199#define _mm_roundscale_round_ss(A, B, C, R) \
8200 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8201 (__v4sf)(__m128)(B), (int)(C), R))
8202#define _mm_roundscale_round_sd(A, B, C, R) \
8203 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8204 (__v2df)(__m128d)(B), (int)(C), R))
756c5857
AI
8205#endif
8206
8207extern __inline __m512
8208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8209_mm512_floor_ps (__m512 __A)
8210{
8211 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8212 _MM_FROUND_FLOOR,
8213 (__v16sf) __A, -1,
8214 _MM_FROUND_CUR_DIRECTION);
8215}
8216
8217extern __inline __m512d
8218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8219_mm512_floor_pd (__m512d __A)
8220{
8221 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8222 _MM_FROUND_FLOOR,
8223 (__v8df) __A, -1,
8224 _MM_FROUND_CUR_DIRECTION);
8225}
8226
8227extern __inline __m512
8228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8229_mm512_ceil_ps (__m512 __A)
8230{
8231 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8232 _MM_FROUND_CEIL,
8233 (__v16sf) __A, -1,
8234 _MM_FROUND_CUR_DIRECTION);
8235}
8236
8237extern __inline __m512d
8238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8239_mm512_ceil_pd (__m512d __A)
8240{
8241 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8242 _MM_FROUND_CEIL,
8243 (__v8df) __A, -1,
8244 _MM_FROUND_CUR_DIRECTION);
8245}
8246
8247extern __inline __m512
8248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8249_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8250{
8251 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8252 _MM_FROUND_FLOOR,
8253 (__v16sf) __W, __U,
8254 _MM_FROUND_CUR_DIRECTION);
8255}
8256
8257extern __inline __m512d
8258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8259_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8260{
8261 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8262 _MM_FROUND_FLOOR,
8263 (__v8df) __W, __U,
8264 _MM_FROUND_CUR_DIRECTION);
8265}
8266
8267extern __inline __m512
8268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8269_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8270{
8271 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8272 _MM_FROUND_CEIL,
8273 (__v16sf) __W, __U,
8274 _MM_FROUND_CUR_DIRECTION);
8275}
8276
8277extern __inline __m512d
8278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8279_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8280{
8281 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8282 _MM_FROUND_CEIL,
8283 (__v8df) __W, __U,
8284 _MM_FROUND_CUR_DIRECTION);
8285}
8286
8287extern __inline __m512
8288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8289_mm512_maskz_floor_ps (__mmask16 __U, __m512 __A)
8290{
8291 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8292 _MM_FROUND_FLOOR,
8293 (__v16sf)
8294 _mm512_setzero_ps (),
8295 __U,
8296 _MM_FROUND_CUR_DIRECTION);
8297}
8298
8299extern __inline __m512d
8300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8301_mm512_maskz_floor_pd (__mmask8 __U, __m512d __A)
8302{
8303 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8304 _MM_FROUND_FLOOR,
8305 (__v8df)
8306 _mm512_setzero_pd (),
8307 __U,
8308 _MM_FROUND_CUR_DIRECTION);
8309}
8310
8311extern __inline __m512
8312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8313_mm512_maskz_ceil_ps (__mmask16 __U, __m512 __A)
8314{
8315 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8316 _MM_FROUND_CEIL,
8317 (__v16sf)
8318 _mm512_setzero_ps (),
8319 __U,
8320 _MM_FROUND_CUR_DIRECTION);
8321}
8322
8323extern __inline __m512d
8324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8325_mm512_maskz_ceil_pd (__mmask8 __U, __m512d __A)
8326{
8327 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8328 _MM_FROUND_CEIL,
8329 (__v8df)
8330 _mm512_setzero_pd (),
8331 __U,
8332 _MM_FROUND_CUR_DIRECTION);
8333}
8334
8335#ifdef __OPTIMIZE__
8336extern __inline __m512
8337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8338_mm512_floor_round_ps (__m512 __A, const int __R)
8339{
8340 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8341 _MM_FROUND_FLOOR,
8342 (__v16sf) __A, -1, __R);
8343}
8344
8345extern __inline __m512d
8346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8347_mm512_floor_round_pd (__m512d __A, const int __R)
8348{
8349 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8350 _MM_FROUND_FLOOR,
8351 (__v8df) __A, -1, __R);
8352}
8353
8354extern __inline __m512
8355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8356_mm512_ceil_round_ps (__m512 __A, const int __R)
8357{
8358 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8359 _MM_FROUND_CEIL,
8360 (__v16sf) __A, -1, __R);
8361}
8362
8363extern __inline __m512d
8364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365_mm512_ceil_round_pd (__m512d __A, const int __R)
8366{
8367 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8368 _MM_FROUND_CEIL,
8369 (__v8df) __A, -1, __R);
8370}
8371
8372extern __inline __m512
8373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8374_mm512_mask_floor_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8375 const int __R)
8376{
8377 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8378 _MM_FROUND_FLOOR,
8379 (__v16sf) __W, __U, __R);
8380}
8381
8382extern __inline __m512d
8383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8384_mm512_mask_floor_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8385 const int __R)
8386{
8387 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8388 _MM_FROUND_FLOOR,
8389 (__v8df) __W, __U, __R);
8390}
8391
8392extern __inline __m512
8393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8394_mm512_mask_ceil_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
8395{
8396 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8397 _MM_FROUND_CEIL,
8398 (__v16sf) __W, __U, __R);
8399}
8400
8401extern __inline __m512d
8402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8403_mm512_mask_ceil_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8404 const int __R)
8405{
8406 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8407 _MM_FROUND_CEIL,
8408 (__v8df) __W, __U, __R);
8409}
8410
8411extern __inline __m512
8412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8413_mm512_maskz_floor_round_ps (__mmask16 __U, __m512 __A, const int __R)
8414{
8415 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8416 _MM_FROUND_FLOOR,
8417 (__v16sf)
8418 _mm512_setzero_ps (),
8419 __U, __R);
8420}
8421
8422extern __inline __m512d
8423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8424_mm512_maskz_floor_round_pd (__mmask8 __U, __m512d __A, const int __R)
8425{
8426 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8427 _MM_FROUND_FLOOR,
8428 (__v8df)
8429 _mm512_setzero_pd (),
8430 __U, __R);
8431}
8432
8433extern __inline __m512
8434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8435_mm512_maskz_ceil_round_ps (__mmask16 __U, __m512 __A, const int __R)
8436{
8437 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8438 _MM_FROUND_CEIL,
8439 (__v16sf)
8440 _mm512_setzero_ps (),
8441 __U, __R);
8442}
8443
8444extern __inline __m512d
8445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8446_mm512_maskz_ceil_round_pd (__mmask8 __U, __m512d __A, const int __R)
8447{
8448 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8449 _MM_FROUND_CEIL,
8450 (__v8df)
8451 _mm512_setzero_pd (),
8452 __U, __R);
8453}
8454
8455extern __inline __m512i
8456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8457_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8458{
8459 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8460 (__v16si) __B, __imm,
8461 (__v16si)
8462 _mm512_setzero_si512 (),
8463 (__mmask16) -1);
8464}
8465
8466extern __inline __m512i
8467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8468_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8469 __m512i __B, const int __imm)
8470{
8471 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8472 (__v16si) __B, __imm,
8473 (__v16si) __W,
8474 (__mmask16) __U);
8475}
8476
8477extern __inline __m512i
8478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8479_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8480 const int __imm)
8481{
8482 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8483 (__v16si) __B, __imm,
8484 (__v16si)
8485 _mm512_setzero_si512 (),
8486 (__mmask16) __U);
8487}
8488
8489extern __inline __m512i
8490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8491_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8492{
8493 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8494 (__v8di) __B, __imm,
8495 (__v8di)
8496 _mm512_setzero_si512 (),
8497 (__mmask8) -1);
8498}
8499
8500extern __inline __m512i
8501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8502_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8503 __m512i __B, const int __imm)
8504{
8505 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8506 (__v8di) __B, __imm,
8507 (__v8di) __W,
8508 (__mmask8) __U);
8509}
8510
8511extern __inline __m512i
8512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8513_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8514 const int __imm)
8515{
8516 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8517 (__v8di) __B, __imm,
8518 (__v8di)
8519 _mm512_setzero_si512 (),
8520 (__mmask8) __U);
8521}
8522#else
8523#define _mm512_floor_round_ps(A, R) \
8524 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), \
8525 _MM_FROUND_FLOOR, \
8526 (__v16sf)(__m512)(A), \
8527 (__mmask16)(-1), R))
8528#define _mm512_mask_floor_round_ps(A, B, C, R) \
8529 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8530 _MM_FROUND_FLOOR, \
8531 (__v16sf)(__m512)(A), \
8532 (__mmask16)(B), R))
8533#define _mm512_maskz_floor_round_ps(A, B, R) \
8534 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8535 _MM_FROUND_FLOOR, \
8536 (__v16sf)_mm512_setzero_ps(),\
8537 (__mmask16)(A), R))
8538#define _mm512_floor_round_pd(A, R) \
8539 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), \
8540 _MM_FROUND_FLOOR, \
8541 (__v8df)(__m512d)(A), \
8542 (__mmask8)(-1), R))
8543#define _mm512_mask_floor_round_pd(A, B, C, R) \
8544 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8545 _MM_FROUND_FLOOR, \
8546 (__v8df)(__m512d)(A), \
8547 (__mmask8)(B), R))
8548#define _mm512_maskz_floor_round_pd(A, B, R) \
8549 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8550 _MM_FROUND_FLOOR, \
8551 (__v8df)_mm512_setzero_pd(),\
8552 (__mmask8)(A), R))
8553#define _mm512_ceil_round_ps(A, R) \
8554 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), \
8555 _MM_FROUND_CEIL, \
8556 (__v16sf)(__m512)(A), \
8557 (__mmask16)(-1), R))
8558#define _mm512_mask_ceil_round_ps(A, B, C, R) \
8559 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8560 _MM_FROUND_CEIL, \
8561 (__v16sf)(__m512)(A), \
8562 (__mmask16)(B), R))
8563#define _mm512_maskz_ceil_round_ps(A, B, R) \
8564 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8565 _MM_FROUND_CEIL, \
8566 (__v16sf)_mm512_setzero_ps(),\
8567 (__mmask16)(A), R))
8568#define _mm512_ceil_round_pd(A, R) \
8569 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), \
8570 _MM_FROUND_CEIL, \
8571 (__v8df)(__m512d)(A), \
8572 (__mmask8)(-1), R))
8573#define _mm512_mask_ceil_round_pd(A, B, C, R) \
8574 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8575 _MM_FROUND_CEIL, \
8576 (__v8df)(__m512d)(A), \
8577 (__mmask8)(B), R))
8578#define _mm512_maskz_ceil_round_pd(A, B, R) \
8579 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8580 _MM_FROUND_CEIL, \
8581 (__v8df)_mm512_setzero_pd(),\
8582 (__mmask8)(A), R))
8583
8584#define _mm512_alignr_epi32(X, Y, C) \
8585 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8586 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(X), \
8587 (__mmask16)-1))
8588
8589#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8590 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8591 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8592 (__mmask16)(U)))
8593
8594#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8595 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8596 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)_mm512_setzero_si512 (),\
8597 (__mmask16)(U)))
8598
8599#define _mm512_alignr_epi64(X, Y, C) \
8600 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8601 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(X), (__mmask8)-1))
8602
8603#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8604 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8605 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8606
8607#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8608 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8609 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)_mm512_setzero_si512 (),\
8610 (__mmask8)(U)))
8611#endif
8612
8613extern __inline __mmask16
8614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8615_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8616{
8617 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8618 (__v16si) __B,
8619 (__mmask16) -1);
8620}
8621
8622extern __inline __mmask16
8623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8624_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8625{
8626 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8627 (__v16si) __B, __U);
8628}
8629
8630extern __inline __mmask8
8631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8632_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8633{
8634 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8635 (__v8di) __B, __U);
8636}
8637
8638extern __inline __mmask8
8639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8640_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8641{
8642 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8643 (__v8di) __B,
8644 (__mmask8) -1);
8645}
8646
8647extern __inline __mmask16
8648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8649_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8650{
8651 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8652 (__v16si) __B,
8653 (__mmask16) -1);
8654}
8655
8656extern __inline __mmask16
8657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8658_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8659{
8660 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8661 (__v16si) __B, __U);
8662}
8663
8664extern __inline __mmask8
8665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8666_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8667{
8668 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8669 (__v8di) __B, __U);
8670}
8671
8672extern __inline __mmask8
8673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8674_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8675{
8676 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8677 (__v8di) __B,
8678 (__mmask8) -1);
8679}
8680
8681#define _MM_CMPINT_EQ 0x0
8682#define _MM_CMPINT_LT 0x1
8683#define _MM_CMPINT_LE 0x2
8684#define _MM_CMPINT_UNUSED 0x3
8685#define _MM_CMPINT_NE 0x4
8686#define _MM_CMPINT_NLT 0x5
8687#define _MM_CMPINT_GE 0x5
8688#define _MM_CMPINT_NLE 0x6
8689#define _MM_CMPINT_GT 0x6
8690
8691#ifdef __OPTIMIZE__
8692extern __inline __mmask8
8693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8694_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8695{
8696 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8697 (__v8di) __Y, __P,
8698 (__mmask8) -1);
8699}
8700
8701extern __inline __mmask16
8702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8704{
8705 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8706 (__v16si) __Y, __P,
8707 (__mmask16) -1);
8708}
8709
8710extern __inline __mmask8
8711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8713{
8714 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8715 (__v8di) __Y, __P,
8716 (__mmask8) -1);
8717}
8718
8719extern __inline __mmask16
8720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8721_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
8722{
8723 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8724 (__v16si) __Y, __P,
8725 (__mmask16) -1);
8726}
8727
8728extern __inline __mmask8
8729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
8731 const int __R)
8732{
8733 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
8734 (__v8df) __Y, __P,
8735 (__mmask8) -1, __R);
8736}
8737
8738extern __inline __mmask16
8739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8740_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
8741{
8742 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
8743 (__v16sf) __Y, __P,
8744 (__mmask16) -1, __R);
8745}
8746
8747extern __inline __mmask8
8748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8749_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
8750 const int __P)
8751{
8752 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8753 (__v8di) __Y, __P,
8754 (__mmask8) __U);
8755}
8756
8757extern __inline __mmask16
8758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8759_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
8760 const int __P)
8761{
8762 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8763 (__v16si) __Y, __P,
8764 (__mmask16) __U);
8765}
8766
8767extern __inline __mmask8
8768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8769_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
8770 const int __P)
8771{
8772 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8773 (__v8di) __Y, __P,
8774 (__mmask8) __U);
8775}
8776
8777extern __inline __mmask16
8778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8779_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
8780 const int __P)
8781{
8782 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8783 (__v16si) __Y, __P,
8784 (__mmask16) __U);
8785}
8786
8787extern __inline __mmask8
8788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8789_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
8790 const int __P, const int __R)
8791{
8792 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
8793 (__v8df) __Y, __P,
8794 (__mmask8) __U, __R);
8795}
8796
8797extern __inline __mmask16
8798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8799_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
8800 const int __P, const int __R)
8801{
8802 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
8803 (__v16sf) __Y, __P,
8804 (__mmask16) __U, __R);
8805}
8806
8807extern __inline __mmask8
8808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8809_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
8810{
8811 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
8812 (__v2df) __Y, __P,
8813 (__mmask8) -1, __R);
8814}
8815
8816extern __inline __mmask8
8817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8818_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
8819 const int __P, const int __R)
8820{
8821 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
8822 (__v2df) __Y, __P,
8823 (__mmask8) __M, __R);
8824}
8825
8826extern __inline __mmask8
8827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8828_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
8829{
8830 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
8831 (__v4sf) __Y, __P,
8832 (__mmask8) -1, __R);
8833}
8834
8835extern __inline __mmask8
8836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8837_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
8838 const int __P, const int __R)
8839{
8840 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
8841 (__v4sf) __Y, __P,
8842 (__mmask8) __M, __R);
8843}
8844
8845#else
8846#define _mm512_cmp_epi64_mask(X, Y, P) \
8847 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
8848 (__v8di)(__m512i)(Y), (int)(P),\
8849 (__mmask8)-1))
8850
8851#define _mm512_cmp_epi32_mask(X, Y, P) \
8852 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
8853 (__v16si)(__m512i)(Y), (int)(P),\
8854 (__mmask16)-1))
8855
8856#define _mm512_cmp_epu64_mask(X, Y, P) \
8857 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
8858 (__v8di)(__m512i)(Y), (int)(P),\
8859 (__mmask8)-1))
8860
8861#define _mm512_cmp_epu32_mask(X, Y, P) \
8862 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
8863 (__v16si)(__m512i)(Y), (int)(P),\
8864 (__mmask16)-1))
8865
8866#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
8867 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
8868 (__v8df)(__m512d)(Y), (int)(P),\
8869 (__mmask8)-1, R))
8870
8871#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
8872 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
8873 (__v16sf)(__m512)(Y), (int)(P),\
8874 (__mmask16)-1, R))
8875
8876#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
8877 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
8878 (__v8di)(__m512i)(Y), (int)(P),\
8879 (__mmask8)M))
8880
8881#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
8882 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
8883 (__v16si)(__m512i)(Y), (int)(P),\
8884 (__mmask16)M))
8885
8886#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
8887 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
8888 (__v8di)(__m512i)(Y), (int)(P),\
8889 (__mmask8)M))
8890
8891#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
8892 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
8893 (__v16si)(__m512i)(Y), (int)(P),\
8894 (__mmask16)M))
8895
8896#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
8897 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
8898 (__v8df)(__m512d)(Y), (int)(P),\
8899 (__mmask8)M, R))
8900
8901#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
8902 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
8903 (__v16sf)(__m512)(Y), (int)(P),\
8904 (__mmask16)M, R))
8905
8906#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8907 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
8908 (__v2df)(__m128d)(Y), (int)(P),\
8909 (__mmask8)-1, R))
8910
8911#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8912 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
8913 (__v2df)(__m128d)(Y), (int)(P),\
8914 (M), R))
8915
8916#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8917 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
8918 (__v4sf)(__m128)(Y), (int)(P), \
8919 (__mmask8)-1, R))
8920
8921#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8922 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
8923 (__v4sf)(__m128)(Y), (int)(P), \
8924 (M), R))
8925#endif
8926
8927#ifdef __OPTIMIZE__
8928extern __inline __m512
8929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8930_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
8931{
8932 __m512 v1_old = _mm512_setzero_ps ();
8933 __mmask16 mask = 0xFFFF;
8934
8935 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
8936 __addr,
8937 (__v16si) __index,
8938 mask, __scale);
8939}
8940
8941extern __inline __m512
8942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8943_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
8944 __m512i __index, float const *__addr, int __scale)
8945{
8946 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
8947 __addr,
8948 (__v16si) __index,
8949 __mask, __scale);
8950}
8951
8952extern __inline __m512d
8953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8954_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
8955{
8956 __m512d v1_old = _mm512_setzero_pd ();
8957 __mmask8 mask = 0xFF;
8958
8959 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
8960 __addr,
8961 (__v8si) __index, mask,
8962 __scale);
8963}
8964
8965extern __inline __m512d
8966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8967_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
8968 __m256i __index, double const *__addr, int __scale)
8969{
8970 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
8971 __addr,
8972 (__v8si) __index,
8973 __mask, __scale);
8974}
8975
8976extern __inline __m256
8977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8978_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
8979{
8980 __m256 v1_old = _mm256_setzero_ps ();
8981 __mmask8 mask = 0xFF;
8982
8983 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
8984 __addr,
8985 (__v8di) __index, mask,
8986 __scale);
8987}
8988
8989extern __inline __m256
8990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8991_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
8992 __m512i __index, float const *__addr, int __scale)
8993{
8994 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
8995 __addr,
8996 (__v8di) __index,
8997 __mask, __scale);
8998}
8999
9000extern __inline __m512d
9001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9002_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9003{
9004 __m512d v1_old = _mm512_setzero_pd ();
9005 __mmask8 mask = 0xFF;
9006
9007 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9008 __addr,
9009 (__v8di) __index, mask,
9010 __scale);
9011}
9012
9013extern __inline __m512d
9014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9015_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9016 __m512i __index, double const *__addr, int __scale)
9017{
9018 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9019 __addr,
9020 (__v8di) __index,
9021 __mask, __scale);
9022}
9023
9024extern __inline __m512i
9025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9026_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9027{
9028 __m512i v1_old = _mm512_setzero_si512 ();
9029 __mmask16 mask = 0xFFFF;
9030
9031 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9032 __addr,
9033 (__v16si) __index,
9034 mask, __scale);
9035}
9036
9037extern __inline __m512i
9038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9039_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9040 __m512i __index, int const *__addr, int __scale)
9041{
9042 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9043 __addr,
9044 (__v16si) __index,
9045 __mask, __scale);
9046}
9047
9048extern __inline __m512i
9049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9050_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9051{
9052 __m512i v1_old = _mm512_setzero_si512 ();
9053 __mmask8 mask = 0xFF;
9054
9055 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9056 __addr,
9057 (__v8si) __index, mask,
9058 __scale);
9059}
9060
9061extern __inline __m512i
9062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9063_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9064 __m256i __index, long long const *__addr,
9065 int __scale)
9066{
9067 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9068 __addr,
9069 (__v8si) __index,
9070 __mask, __scale);
9071}
9072
9073extern __inline __m256i
9074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9075_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9076{
9077 __m256i v1_old = _mm256_setzero_si256 ();
9078 __mmask8 mask = 0xFF;
9079
9080 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9081 __addr,
9082 (__v8di) __index,
9083 mask, __scale);
9084}
9085
9086extern __inline __m256i
9087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9088_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9089 __m512i __index, int const *__addr, int __scale)
9090{
9091 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9092 __addr,
9093 (__v8di) __index,
9094 __mask, __scale);
9095}
9096
9097extern __inline __m512i
9098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9099_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9100{
9101 __m512i v1_old = _mm512_setzero_si512 ();
9102 __mmask8 mask = 0xFF;
9103
9104 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9105 __addr,
9106 (__v8di) __index, mask,
9107 __scale);
9108}
9109
9110extern __inline __m512i
9111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9112_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9113 __m512i __index, long long const *__addr,
9114 int __scale)
9115{
9116 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9117 __addr,
9118 (__v8di) __index,
9119 __mask, __scale);
9120}
9121
9122extern __inline void
9123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9124_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9125{
9126 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9127 (__v16si) __index, (__v16sf) __v1, __scale);
9128}
9129
9130extern __inline void
9131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9132_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9133 __m512i __index, __m512 __v1, int __scale)
9134{
9135 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9136 (__v16sf) __v1, __scale);
9137}
9138
9139extern __inline void
9140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9141_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9142 int __scale)
9143{
9144 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9145 (__v8si) __index, (__v8df) __v1, __scale);
9146}
9147
9148extern __inline void
9149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9150_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9151 __m256i __index, __m512d __v1, int __scale)
9152{
9153 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9154 (__v8df) __v1, __scale);
9155}
9156
9157extern __inline void
9158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9159_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9160{
9161 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9162 (__v8di) __index, (__v8sf) __v1, __scale);
9163}
9164
9165extern __inline void
9166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9167_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9168 __m512i __index, __m256 __v1, int __scale)
9169{
9170 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9171 (__v8sf) __v1, __scale);
9172}
9173
9174extern __inline void
9175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9176_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9177 int __scale)
9178{
9179 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9180 (__v8di) __index, (__v8df) __v1, __scale);
9181}
9182
9183extern __inline void
9184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9185_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9186 __m512i __index, __m512d __v1, int __scale)
9187{
9188 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9189 (__v8df) __v1, __scale);
9190}
9191
9192extern __inline void
9193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9194_mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9195 __m512i __v1, int __scale)
9196{
9197 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9198 (__v16si) __index, (__v16si) __v1, __scale);
9199}
9200
9201extern __inline void
9202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9203_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9204 __m512i __index, __m512i __v1, int __scale)
9205{
9206 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9207 (__v16si) __v1, __scale);
9208}
9209
9210extern __inline void
9211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9212_mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9213 __m512i __v1, int __scale)
9214{
9215 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9216 (__v8si) __index, (__v8di) __v1, __scale);
9217}
9218
9219extern __inline void
9220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9221_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9222 __m256i __index, __m512i __v1, int __scale)
9223{
9224 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9225 (__v8di) __v1, __scale);
9226}
9227
9228extern __inline void
9229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9230_mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9231 __m256i __v1, int __scale)
9232{
9233 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9234 (__v8di) __index, (__v8si) __v1, __scale);
9235}
9236
9237extern __inline void
9238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9239_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9240 __m512i __index, __m256i __v1, int __scale)
9241{
9242 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9243 (__v8si) __v1, __scale);
9244}
9245
9246extern __inline void
9247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9248_mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9249 __m512i __v1, int __scale)
9250{
9251 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9252 (__v8di) __index, (__v8di) __v1, __scale);
9253}
9254
9255extern __inline void
9256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9257_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9258 __m512i __index, __m512i __v1, int __scale)
9259{
9260 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9261 (__v8di) __v1, __scale);
9262}
9263#else
9264#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9265 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_setzero_ps(), \
9266 (float const *)ADDR, \
9267 (__v16si)(__m512i)INDEX, \
9268 (__mmask16)0xFFFF, (int)SCALE)
9269
9270#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9271 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9272 (float const *)ADDR, \
9273 (__v16si)(__m512i)INDEX, \
9274 (__mmask16)MASK, (int)SCALE)
9275
9276#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9277 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_setzero_pd(), \
9278 (double const *)ADDR, \
9279 (__v8si)(__m256i)INDEX, \
9280 (__mmask8)0xFF, (int)SCALE)
9281
9282#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9283 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9284 (double const *)ADDR, \
9285 (__v8si)(__m256i)INDEX, \
9286 (__mmask8)MASK, (int)SCALE)
9287
9288#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9289 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_setzero_ps(), \
9290 (float const *)ADDR, \
9291 (__v8di)(__m512i)INDEX, \
9292 (__mmask8)0xFF, (int)SCALE)
9293
9294#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9295 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9296 (float const *)ADDR, \
9297 (__v8di)(__m512i)INDEX, \
9298 (__mmask8)MASK, (int)SCALE)
9299
9300#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9301 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_setzero_pd(), \
9302 (double const *)ADDR, \
9303 (__v8di)(__m512i)INDEX, \
9304 (__mmask8)0xFF, (int)SCALE)
9305
9306#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9307 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9308 (double const *)ADDR, \
9309 (__v8di)(__m512i)INDEX, \
9310 (__mmask8)MASK, (int)SCALE)
9311
9312#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9313 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_setzero_si512 (), \
9314 (int const *)ADDR, \
9315 (__v16si)(__m512i)INDEX, \
9316 (__mmask16)0xFFFF, (int)SCALE)
9317
9318#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9319 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9320 (int const *)ADDR, \
9321 (__v16si)(__m512i)INDEX, \
9322 (__mmask16)MASK, (int)SCALE)
9323
9324#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9325 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_setzero_si512 (), \
9326 (long long const *)ADDR, \
9327 (__v8si)(__m256i)INDEX, \
9328 (__mmask8)0xFF, (int)SCALE)
9329
9330#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9331 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9332 (long long const *)ADDR, \
9333 (__v8si)(__m256i)INDEX, \
9334 (__mmask8)MASK, (int)SCALE)
9335
9336#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9337 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_setzero_si256(), \
9338 (int const *)ADDR, \
9339 (__v8di)(__m512i)INDEX, \
9340 (__mmask8)0xFF, (int)SCALE)
9341
9342#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9343 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9344 (int const *)ADDR, \
9345 (__v8di)(__m512i)INDEX, \
9346 (__mmask8)MASK, (int)SCALE)
9347
9348#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9349 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_setzero_si512 (), \
9350 (long long const *)ADDR, \
9351 (__v8di)(__m512i)INDEX, \
9352 (__mmask8)0xFF, (int)SCALE)
9353
9354#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9355 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9356 (long long const *)ADDR, \
9357 (__v8di)(__m512i)INDEX, \
9358 (__mmask8)MASK, (int)SCALE)
9359
9360#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9361 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9362 (__v16si)(__m512i)INDEX, \
9363 (__v16sf)(__m512)V1, (int)SCALE)
9364
9365#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9366 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9367 (__v16si)(__m512i)INDEX, \
9368 (__v16sf)(__m512)V1, (int)SCALE)
9369
9370#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9371 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9372 (__v8si)(__m256i)INDEX, \
9373 (__v8df)(__m512d)V1, (int)SCALE)
9374
9375#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9376 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9377 (__v8si)(__m256i)INDEX, \
9378 (__v8df)(__m512d)V1, (int)SCALE)
9379
9380#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9381 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9382 (__v8di)(__m512i)INDEX, \
9383 (__v8sf)(__m256)V1, (int)SCALE)
9384
9385#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9386 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9387 (__v8di)(__m512i)INDEX, \
9388 (__v8sf)(__m256)V1, (int)SCALE)
9389
9390#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9391 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9392 (__v8di)(__m512i)INDEX, \
9393 (__v8df)(__m512d)V1, (int)SCALE)
9394
9395#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9396 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9397 (__v8di)(__m512i)INDEX, \
9398 (__v8df)(__m512d)V1, (int)SCALE)
9399
9400#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9401 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9402 (__v16si)(__m512i)INDEX, \
9403 (__v16si)(__m512i)V1, (int)SCALE)
9404
9405#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9406 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9407 (__v16si)(__m512i)INDEX, \
9408 (__v16si)(__m512i)V1, (int)SCALE)
9409
9410#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9411 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9412 (__v8si)(__m256i)INDEX, \
9413 (__v8di)(__m512i)V1, (int)SCALE)
9414
9415#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9416 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9417 (__v8si)(__m256i)INDEX, \
9418 (__v8di)(__m512i)V1, (int)SCALE)
9419
9420#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9421 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9422 (__v8di)(__m512i)INDEX, \
9423 (__v8si)(__m256i)V1, (int)SCALE)
9424
9425#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9426 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9427 (__v8di)(__m512i)INDEX, \
9428 (__v8si)(__m256i)V1, (int)SCALE)
9429
9430#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9431 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9432 (__v8di)(__m512i)INDEX, \
9433 (__v8di)(__m512i)V1, (int)SCALE)
9434
9435#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9436 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9437 (__v8di)(__m512i)INDEX, \
9438 (__v8di)(__m512i)V1, (int)SCALE)
9439#endif
9440
9441extern __inline __m512d
9442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9443_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9444{
9445 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9446 (__v8df) __W,
9447 (__mmask8) __U);
9448}
9449
9450extern __inline __m512d
9451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9452_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9453{
9454 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9455 (__v8df)
9456 _mm512_setzero_pd (),
9457 (__mmask8) __U);
9458}
9459
9460extern __inline void
9461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9462_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9463{
9464 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9465 (__mmask8) __U);
9466}
9467
9468extern __inline __m512
9469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9470_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9471{
9472 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9473 (__v16sf) __W,
9474 (__mmask16) __U);
9475}
9476
9477extern __inline __m512
9478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9479_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9480{
9481 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9482 (__v16sf)
9483 _mm512_setzero_ps (),
9484 (__mmask16) __U);
9485}
9486
9487extern __inline void
9488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9489_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9490{
9491 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9492 (__mmask16) __U);
9493}
9494
9495extern __inline __m512i
9496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9497_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9498{
9499 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9500 (__v8di) __W,
9501 (__mmask8) __U);
9502}
9503
9504extern __inline __m512i
9505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9506_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9507{
9508 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9509 (__v8di)
9510 _mm512_setzero_si512 (),
9511 (__mmask8) __U);
9512}
9513
9514extern __inline void
9515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9516_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9517{
9518 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9519 (__mmask8) __U);
9520}
9521
9522extern __inline __m512i
9523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9524_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9525{
9526 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9527 (__v16si) __W,
9528 (__mmask16) __U);
9529}
9530
9531extern __inline __m512i
9532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9533_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9534{
9535 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9536 (__v16si)
9537 _mm512_setzero_si512 (),
9538 (__mmask16) __U);
9539}
9540
9541extern __inline void
9542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9543_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9544{
9545 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9546 (__mmask16) __U);
9547}
9548
9549extern __inline __m512d
9550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9551_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9552{
9553 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9554 (__v8df) __W,
9555 (__mmask8) __U);
9556}
9557
9558extern __inline __m512d
9559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9560_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9561{
9562 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9563 (__v8df)
9564 _mm512_setzero_pd (),
9565 (__mmask8) __U);
9566}
9567
9568extern __inline __m512d
9569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9570_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9571{
9572 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9573 (__v8df) __W,
9574 (__mmask8) __U);
9575}
9576
9577extern __inline __m512d
9578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9579_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9580{
9581 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9582 (__v8df)
9583 _mm512_setzero_pd (),
9584 (__mmask8) __U);
9585}
9586
9587extern __inline __m512
9588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9589_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9590{
9591 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9592 (__v16sf) __W,
9593 (__mmask16) __U);
9594}
9595
9596extern __inline __m512
9597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9599{
9600 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9601 (__v16sf)
9602 _mm512_setzero_ps (),
9603 (__mmask16) __U);
9604}
9605
9606extern __inline __m512
9607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9608_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9609{
9610 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9611 (__v16sf) __W,
9612 (__mmask16) __U);
9613}
9614
9615extern __inline __m512
9616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9617_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9618{
9619 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9620 (__v16sf)
9621 _mm512_setzero_ps (),
9622 (__mmask16) __U);
9623}
9624
9625extern __inline __m512i
9626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9627_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9628{
9629 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9630 (__v8di) __W,
9631 (__mmask8) __U);
9632}
9633
9634extern __inline __m512i
9635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9636_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9637{
9638 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9639 (__v8di)
9640 _mm512_setzero_si512 (),
9641 (__mmask8) __U);
9642}
9643
9644extern __inline __m512i
9645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9646_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9647{
9648 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9649 (__v8di) __W,
9650 (__mmask8) __U);
9651}
9652
9653extern __inline __m512i
9654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9655_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9656{
9657 return (__m512i)
9658 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9659 (__v8di)
9660 _mm512_setzero_si512 (),
9661 (__mmask8) __U);
9662}
9663
9664extern __inline __m512i
9665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9666_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9667{
9668 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9669 (__v16si) __W,
9670 (__mmask16) __U);
9671}
9672
9673extern __inline __m512i
9674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9675_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9676{
9677 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9678 (__v16si)
9679 _mm512_setzero_si512 (),
9680 (__mmask16) __U);
9681}
9682
9683extern __inline __m512i
9684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9685_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9686{
9687 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9688 (__v16si) __W,
9689 (__mmask16) __U);
9690}
9691
9692extern __inline __m512i
9693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9694_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9695{
9696 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9697 (__v16si)
9698 _mm512_setzero_si512
9699 (), (__mmask16) __U);
9700}
9701
9702/* Mask arithmetic operations */
9703extern __inline __mmask16
9704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9705_mm512_kand (__mmask16 __A, __mmask16 __B)
9706{
9707 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9708}
9709
9710extern __inline __mmask16
9711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9712_mm512_kandn (__mmask16 __A, __mmask16 __B)
9713{
9714 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9715}
9716
9717extern __inline __mmask16
9718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9719_mm512_kor (__mmask16 __A, __mmask16 __B)
9720{
9721 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
9722}
9723
9724extern __inline int
9725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9726_mm512_kortestz (__mmask16 __A, __mmask16 __B)
9727{
9728 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
9729 (__mmask16) __B);
9730}
9731
9732extern __inline int
9733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9734_mm512_kortestc (__mmask16 __A, __mmask16 __B)
9735{
9736 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
9737 (__mmask16) __B);
9738}
9739
9740extern __inline __mmask16
9741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9742_mm512_kxnor (__mmask16 __A, __mmask16 __B)
9743{
9744 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
9745}
9746
9747extern __inline __mmask16
9748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9749_mm512_kxor (__mmask16 __A, __mmask16 __B)
9750{
9751 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
9752}
9753
9754extern __inline __mmask16
9755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9756_mm512_knot (__mmask16 __A)
9757{
9758 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
9759}
9760
9761extern __inline __mmask16
9762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9763_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
9764{
9765 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
9766}
9767
9768#ifdef __OPTIMIZE__
9769extern __inline __m512i
9770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9771_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
9772 const int __imm)
9773{
9774 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
9775 (__v4si) __D,
9776 __imm,
9777 (__v16si)
9778 _mm512_setzero_si512 (),
9779 __B);
9780}
9781
9782extern __inline __m512
9783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9784_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
9785 const int __imm)
9786{
9787 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
9788 (__v4sf) __D,
9789 __imm,
9790 (__v16sf)
9791 _mm512_setzero_ps (), __B);
9792}
9793
9794extern __inline __m512i
9795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
9797 __m128i __D, const int __imm)
9798{
9799 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
9800 (__v4si) __D,
9801 __imm,
9802 (__v16si) __A,
9803 __B);
9804}
9805
9806extern __inline __m512
9807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9808_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
9809 __m128 __D, const int __imm)
9810{
9811 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
9812 (__v4sf) __D,
9813 __imm,
9814 (__v16sf) __A, __B);
9815}
9816#else
9817#define _mm512_maskz_insertf32x4(A, X, Y, C) \
9818 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
9819 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
9820 (__mmask8)(A)))
9821
9822#define _mm512_maskz_inserti32x4(A, X, Y, C) \
9823 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
9824 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
9825 (__mmask8)(A)))
9826
9827#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
9828 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
9829 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
9830 (__mmask8)(B)))
9831
9832#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
9833 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
9834 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
9835 (__mmask8)(B)))
9836#endif
9837
9838extern __inline __m512i
9839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9840_mm512_max_epi64 (__m512i __A, __m512i __B)
9841{
9842 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
9843 (__v8di) __B,
9844 (__v8di)
9845 _mm512_setzero_si512 (),
9846 (__mmask8) -1);
9847}
9848
9849extern __inline __m512i
9850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9851_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
9852{
9853 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
9854 (__v8di) __B,
9855 (__v8di)
9856 _mm512_setzero_si512 (),
9857 __M);
9858}
9859
9860extern __inline __m512i
9861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9862_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
9863{
9864 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
9865 (__v8di) __B,
9866 (__v8di) __W, __M);
9867}
9868
9869extern __inline __m512i
9870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9871_mm512_min_epi64 (__m512i __A, __m512i __B)
9872{
9873 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
9874 (__v8di) __B,
9875 (__v8di)
9876 _mm512_setzero_si512 (),
9877 (__mmask8) -1);
9878}
9879
9880extern __inline __m512i
9881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9882_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
9883{
9884 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
9885 (__v8di) __B,
9886 (__v8di) __W, __M);
9887}
9888
9889extern __inline __m512i
9890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9891_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
9892{
9893 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
9894 (__v8di) __B,
9895 (__v8di)
9896 _mm512_setzero_si512 (),
9897 __M);
9898}
9899
9900extern __inline __m512i
9901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9902_mm512_max_epu64 (__m512i __A, __m512i __B)
9903{
9904 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
9905 (__v8di) __B,
9906 (__v8di)
9907 _mm512_setzero_si512 (),
9908 (__mmask8) -1);
9909}
9910
9911extern __inline __m512i
9912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9913_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
9914{
9915 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
9916 (__v8di) __B,
9917 (__v8di)
9918 _mm512_setzero_si512 (),
9919 __M);
9920}
9921
9922extern __inline __m512i
9923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9924_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
9925{
9926 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
9927 (__v8di) __B,
9928 (__v8di) __W, __M);
9929}
9930
9931extern __inline __m512i
9932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9933_mm512_min_epu64 (__m512i __A, __m512i __B)
9934{
9935 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
9936 (__v8di) __B,
9937 (__v8di)
9938 _mm512_setzero_si512 (),
9939 (__mmask8) -1);
9940}
9941
9942extern __inline __m512i
9943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9944_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
9945{
9946 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
9947 (__v8di) __B,
9948 (__v8di) __W, __M);
9949}
9950
9951extern __inline __m512i
9952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9953_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
9954{
9955 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
9956 (__v8di) __B,
9957 (__v8di)
9958 _mm512_setzero_si512 (),
9959 __M);
9960}
9961
9962extern __inline __m512i
9963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9964_mm512_max_epi32 (__m512i __A, __m512i __B)
9965{
9966 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
9967 (__v16si) __B,
9968 (__v16si)
9969 _mm512_setzero_si512 (),
9970 (__mmask16) -1);
9971}
9972
9973extern __inline __m512i
9974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9975_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
9976{
9977 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
9978 (__v16si) __B,
9979 (__v16si)
9980 _mm512_setzero_si512 (),
9981 __M);
9982}
9983
9984extern __inline __m512i
9985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9986_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
9987{
9988 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
9989 (__v16si) __B,
9990 (__v16si) __W, __M);
9991}
9992
9993extern __inline __m512i
9994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9995_mm512_min_epi32 (__m512i __A, __m512i __B)
9996{
9997 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
9998 (__v16si) __B,
9999 (__v16si)
10000 _mm512_setzero_si512 (),
10001 (__mmask16) -1);
10002}
10003
10004extern __inline __m512i
10005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10006_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10007{
10008 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10009 (__v16si) __B,
10010 (__v16si)
10011 _mm512_setzero_si512 (),
10012 __M);
10013}
10014
10015extern __inline __m512i
10016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10017_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10018{
10019 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10020 (__v16si) __B,
10021 (__v16si) __W, __M);
10022}
10023
10024extern __inline __m512i
10025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10026_mm512_max_epu32 (__m512i __A, __m512i __B)
10027{
10028 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10029 (__v16si) __B,
10030 (__v16si)
10031 _mm512_setzero_si512 (),
10032 (__mmask16) -1);
10033}
10034
10035extern __inline __m512i
10036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10038{
10039 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10040 (__v16si) __B,
10041 (__v16si)
10042 _mm512_setzero_si512 (),
10043 __M);
10044}
10045
10046extern __inline __m512i
10047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10049{
10050 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10051 (__v16si) __B,
10052 (__v16si) __W, __M);
10053}
10054
10055extern __inline __m512i
10056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10057_mm512_min_epu32 (__m512i __A, __m512i __B)
10058{
10059 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10060 (__v16si) __B,
10061 (__v16si)
10062 _mm512_setzero_si512 (),
10063 (__mmask16) -1);
10064}
10065
10066extern __inline __m512i
10067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10068_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10069{
10070 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10071 (__v16si) __B,
10072 (__v16si)
10073 _mm512_setzero_si512 (),
10074 __M);
10075}
10076
10077extern __inline __m512i
10078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10079_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10080{
10081 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10082 (__v16si) __B,
10083 (__v16si) __W, __M);
10084}
10085
10086extern __inline __m512
10087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10088_mm512_unpacklo_ps (__m512 __A, __m512 __B)
10089{
10090 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10091 (__v16sf) __B,
10092 (__v16sf)
10093 _mm512_setzero_ps (),
10094 (__mmask16) -1);
10095}
10096
10097extern __inline __m512
10098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10099_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10100{
10101 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10102 (__v16sf) __B,
10103 (__v16sf) __W,
10104 (__mmask16) __U);
10105}
10106
10107extern __inline __m512
10108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10109_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10110{
10111 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10112 (__v16sf) __B,
10113 (__v16sf)
10114 _mm512_setzero_ps (),
10115 (__mmask16) __U);
10116}
10117
075691af
AI
10118#ifdef __OPTIMIZE__
10119extern __inline __m128d
10120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10121_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10122{
10123 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10124 (__v2df) __B,
10125 __R);
10126}
10127
10128extern __inline __m128
10129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10130_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10131{
10132 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10133 (__v4sf) __B,
10134 __R);
10135}
10136
10137extern __inline __m128d
10138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10139_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10140{
10141 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10142 (__v2df) __B,
10143 __R);
10144}
10145
10146extern __inline __m128
10147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10148_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10149{
10150 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10151 (__v4sf) __B,
10152 __R);
10153}
10154
10155#else
10156#define _mm_max_round_sd(A, B, C) \
10157 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10158
10159#define _mm_max_round_ss(A, B, C) \
10160 (__m128)__builtin_ia32_addss_round(A, B, C)
10161
10162#define _mm_min_round_sd(A, B, C) \
10163 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10164
10165#define _mm_min_round_ss(A, B, C) \
10166 (__m128)__builtin_ia32_subss_round(A, B, C)
10167#endif
10168
756c5857
AI
10169extern __inline __m512d
10170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10171_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10172{
10173 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10174 (__v8df) __W,
10175 (__mmask8) __U);
10176}
10177
10178extern __inline __m512
10179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10180_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10181{
10182 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10183 (__v16sf) __W,
10184 (__mmask16) __U);
10185}
10186
10187extern __inline __m512i
10188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10189_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10190{
10191 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10192 (__v8di) __W,
10193 (__mmask8) __U);
10194}
10195
10196extern __inline __m512i
10197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10198_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10199{
10200 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10201 (__v16si) __W,
10202 (__mmask16) __U);
10203}
10204
075691af
AI
10205#ifdef __OPTIMIZE__
10206extern __inline __m128d
10207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10208_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10209{
10210 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10211 (__v2df) __A,
10212 (__v2df) __B,
10213 __R);
10214}
10215
10216extern __inline __m128
10217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10218_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10219{
10220 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10221 (__v4sf) __A,
10222 (__v4sf) __B,
10223 __R);
10224}
10225
10226extern __inline __m128d
10227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10228_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10229{
10230 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10231 (__v2df) __A,
10232 -(__v2df) __B,
10233 __R);
10234}
10235
10236extern __inline __m128
10237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10238_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10239{
10240 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10241 (__v4sf) __A,
10242 -(__v4sf) __B,
10243 __R);
10244}
10245
10246extern __inline __m128d
10247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10248_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10249{
10250 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10251 -(__v2df) __A,
10252 (__v2df) __B,
10253 __R);
10254}
10255
10256extern __inline __m128
10257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10258_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10259{
10260 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10261 -(__v4sf) __A,
10262 (__v4sf) __B,
10263 __R);
10264}
10265
10266extern __inline __m128d
10267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10268_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10269{
10270 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10271 -(__v2df) __A,
10272 -(__v2df) __B,
10273 __R);
10274}
10275
10276extern __inline __m128
10277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10278_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10279{
10280 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10281 -(__v4sf) __A,
10282 -(__v4sf) __B,
10283 __R);
10284}
10285#else
10286#define _mm_fmadd_round_sd(A, B, C, R) \
10287 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10288
10289#define _mm_fmadd_round_ss(A, B, C, R) \
10290 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10291
10292#define _mm_fmsub_round_sd(A, B, C, R) \
10293 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10294
10295#define _mm_fmsub_round_ss(A, B, C, R) \
10296 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10297
10298#define _mm_fnmadd_round_sd(A, B, C, R) \
10299 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10300
10301#define _mm_fnmadd_round_ss(A, B, C, R) \
10302 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10303
10304#define _mm_fnmsub_round_sd(A, B, C, R) \
10305 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10306
10307#define _mm_fnmsub_round_ss(A, B, C, R) \
10308 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10309#endif
10310
756c5857
AI
10311#ifdef __OPTIMIZE__
10312extern __inline int
10313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10314_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10315{
10316 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10317}
10318
10319extern __inline int
10320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10321_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10322{
10323 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10324}
10325#else
10326#define _mm_comi_round_ss(A, B, C, D)\
10327__builtin_ia32_vcomiss(A, B, C, D)
10328#define _mm_comi_round_sd(A, B, C, D)\
10329__builtin_ia32_vcomisd(A, B, C, D)
10330#endif
10331
10332extern __inline __m512d
10333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10334_mm512_sqrt_pd (__m512d __A)
10335{
10336 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10337 (__v8df)
10338 _mm512_setzero_pd (),
10339 (__mmask8) -1,
10340 _MM_FROUND_CUR_DIRECTION);
10341}
10342
10343extern __inline __m512d
10344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10345_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10346{
10347 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10348 (__v8df) __W,
10349 (__mmask8) __U,
10350 _MM_FROUND_CUR_DIRECTION);
10351}
10352
10353extern __inline __m512d
10354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10355_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10356{
10357 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10358 (__v8df)
10359 _mm512_setzero_pd (),
10360 (__mmask8) __U,
10361 _MM_FROUND_CUR_DIRECTION);
10362}
10363
10364extern __inline __m512
10365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10366_mm512_sqrt_ps (__m512 __A)
10367{
10368 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10369 (__v16sf)
10370 _mm512_setzero_ps (),
10371 (__mmask16) -1,
10372 _MM_FROUND_CUR_DIRECTION);
10373}
10374
10375extern __inline __m512
10376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10377_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10378{
10379 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10380 (__v16sf) __W,
10381 (__mmask16) __U,
10382 _MM_FROUND_CUR_DIRECTION);
10383}
10384
10385extern __inline __m512
10386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10387_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10388{
10389 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10390 (__v16sf)
10391 _mm512_setzero_ps (),
10392 (__mmask16) __U,
10393 _MM_FROUND_CUR_DIRECTION);
10394}
10395
10396extern __inline __m512d
10397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10398_mm512_add_pd (__m512d __A, __m512d __B)
10399{
10400 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10401 (__v8df) __B,
10402 (__v8df)
10403 _mm512_setzero_pd (),
10404 (__mmask8) -1,
10405 _MM_FROUND_CUR_DIRECTION);
10406}
10407
10408extern __inline __m512d
10409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10410_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10411{
10412 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10413 (__v8df) __B,
10414 (__v8df) __W,
10415 (__mmask8) __U,
10416 _MM_FROUND_CUR_DIRECTION);
10417}
10418
10419extern __inline __m512d
10420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10422{
10423 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10424 (__v8df) __B,
10425 (__v8df)
10426 _mm512_setzero_pd (),
10427 (__mmask8) __U,
10428 _MM_FROUND_CUR_DIRECTION);
10429}
10430
10431extern __inline __m512
10432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10433_mm512_add_ps (__m512 __A, __m512 __B)
10434{
10435 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10436 (__v16sf) __B,
10437 (__v16sf)
10438 _mm512_setzero_ps (),
10439 (__mmask16) -1,
10440 _MM_FROUND_CUR_DIRECTION);
10441}
10442
10443extern __inline __m512
10444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10445_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10446{
10447 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10448 (__v16sf) __B,
10449 (__v16sf) __W,
10450 (__mmask16) __U,
10451 _MM_FROUND_CUR_DIRECTION);
10452}
10453
10454extern __inline __m512
10455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10456_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10457{
10458 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10459 (__v16sf) __B,
10460 (__v16sf)
10461 _mm512_setzero_ps (),
10462 (__mmask16) __U,
10463 _MM_FROUND_CUR_DIRECTION);
10464}
10465
10466extern __inline __m512d
10467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10468_mm512_sub_pd (__m512d __A, __m512d __B)
10469{
10470 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10471 (__v8df) __B,
10472 (__v8df)
10473 _mm512_setzero_pd (),
10474 (__mmask8) -1,
10475 _MM_FROUND_CUR_DIRECTION);
10476}
10477
10478extern __inline __m512d
10479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10480_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10481{
10482 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10483 (__v8df) __B,
10484 (__v8df) __W,
10485 (__mmask8) __U,
10486 _MM_FROUND_CUR_DIRECTION);
10487}
10488
10489extern __inline __m512d
10490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10491_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10492{
10493 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10494 (__v8df) __B,
10495 (__v8df)
10496 _mm512_setzero_pd (),
10497 (__mmask8) __U,
10498 _MM_FROUND_CUR_DIRECTION);
10499}
10500
10501extern __inline __m512
10502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10503_mm512_sub_ps (__m512 __A, __m512 __B)
10504{
10505 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10506 (__v16sf) __B,
10507 (__v16sf)
10508 _mm512_setzero_ps (),
10509 (__mmask16) -1,
10510 _MM_FROUND_CUR_DIRECTION);
10511}
10512
10513extern __inline __m512
10514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10515_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10516{
10517 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10518 (__v16sf) __B,
10519 (__v16sf) __W,
10520 (__mmask16) __U,
10521 _MM_FROUND_CUR_DIRECTION);
10522}
10523
10524extern __inline __m512
10525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10526_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10527{
10528 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10529 (__v16sf) __B,
10530 (__v16sf)
10531 _mm512_setzero_ps (),
10532 (__mmask16) __U,
10533 _MM_FROUND_CUR_DIRECTION);
10534}
10535
10536extern __inline __m512d
10537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10538_mm512_mul_pd (__m512d __A, __m512d __B)
10539{
10540 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10541 (__v8df) __B,
10542 (__v8df)
10543 _mm512_setzero_pd (),
10544 (__mmask8) -1,
10545 _MM_FROUND_CUR_DIRECTION);
10546}
10547
10548extern __inline __m512d
10549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10550_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10551{
10552 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10553 (__v8df) __B,
10554 (__v8df) __W,
10555 (__mmask8) __U,
10556 _MM_FROUND_CUR_DIRECTION);
10557}
10558
10559extern __inline __m512d
10560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10561_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10562{
10563 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10564 (__v8df) __B,
10565 (__v8df)
10566 _mm512_setzero_pd (),
10567 (__mmask8) __U,
10568 _MM_FROUND_CUR_DIRECTION);
10569}
10570
10571extern __inline __m512
10572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10573_mm512_mul_ps (__m512 __A, __m512 __B)
10574{
10575 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10576 (__v16sf) __B,
10577 (__v16sf)
10578 _mm512_setzero_ps (),
10579 (__mmask16) -1,
10580 _MM_FROUND_CUR_DIRECTION);
10581}
10582
10583extern __inline __m512
10584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10585_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10586{
10587 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10588 (__v16sf) __B,
10589 (__v16sf) __W,
10590 (__mmask16) __U,
10591 _MM_FROUND_CUR_DIRECTION);
10592}
10593
10594extern __inline __m512
10595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10596_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10597{
10598 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10599 (__v16sf) __B,
10600 (__v16sf)
10601 _mm512_setzero_ps (),
10602 (__mmask16) __U,
10603 _MM_FROUND_CUR_DIRECTION);
10604}
10605
10606extern __inline __m512d
10607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10608_mm512_div_pd (__m512d __M, __m512d __V)
10609{
10610 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10611 (__v8df) __V,
10612 (__v8df)
10613 _mm512_setzero_pd (),
10614 (__mmask8) -1,
10615 _MM_FROUND_CUR_DIRECTION);
10616}
10617
10618extern __inline __m512d
10619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10620_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10621{
10622 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10623 (__v8df) __V,
10624 (__v8df) __W,
10625 (__mmask8) __U,
10626 _MM_FROUND_CUR_DIRECTION);
10627}
10628
10629extern __inline __m512d
10630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10631_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10632{
10633 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10634 (__v8df) __V,
10635 (__v8df)
10636 _mm512_setzero_pd (),
10637 (__mmask8) __U,
10638 _MM_FROUND_CUR_DIRECTION);
10639}
10640
10641extern __inline __m512
10642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10643_mm512_div_ps (__m512 __A, __m512 __B)
10644{
10645 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10646 (__v16sf) __B,
10647 (__v16sf)
10648 _mm512_setzero_ps (),
10649 (__mmask16) -1,
10650 _MM_FROUND_CUR_DIRECTION);
10651}
10652
10653extern __inline __m512
10654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10655_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10656{
10657 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10658 (__v16sf) __B,
10659 (__v16sf) __W,
10660 (__mmask16) __U,
10661 _MM_FROUND_CUR_DIRECTION);
10662}
10663
10664extern __inline __m512
10665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10666_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10667{
10668 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10669 (__v16sf) __B,
10670 (__v16sf)
10671 _mm512_setzero_ps (),
10672 (__mmask16) __U,
10673 _MM_FROUND_CUR_DIRECTION);
10674}
10675
10676extern __inline __m512d
10677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10678_mm512_max_pd (__m512d __A, __m512d __B)
10679{
10680 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10681 (__v8df) __B,
10682 (__v8df)
10683 _mm512_setzero_pd (),
10684 (__mmask8) -1,
10685 _MM_FROUND_CUR_DIRECTION);
10686}
10687
10688extern __inline __m512d
10689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10690_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10691{
10692 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10693 (__v8df) __B,
10694 (__v8df) __W,
10695 (__mmask8) __U,
10696 _MM_FROUND_CUR_DIRECTION);
10697}
10698
10699extern __inline __m512d
10700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10701_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10702{
10703 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10704 (__v8df) __B,
10705 (__v8df)
10706 _mm512_setzero_pd (),
10707 (__mmask8) __U,
10708 _MM_FROUND_CUR_DIRECTION);
10709}
10710
10711extern __inline __m512
10712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10713_mm512_max_ps (__m512 __A, __m512 __B)
10714{
10715 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10716 (__v16sf) __B,
10717 (__v16sf)
10718 _mm512_setzero_ps (),
10719 (__mmask16) -1,
10720 _MM_FROUND_CUR_DIRECTION);
10721}
10722
10723extern __inline __m512
10724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10725_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10726{
10727 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10728 (__v16sf) __B,
10729 (__v16sf) __W,
10730 (__mmask16) __U,
10731 _MM_FROUND_CUR_DIRECTION);
10732}
10733
10734extern __inline __m512
10735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10737{
10738 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10739 (__v16sf) __B,
10740 (__v16sf)
10741 _mm512_setzero_ps (),
10742 (__mmask16) __U,
10743 _MM_FROUND_CUR_DIRECTION);
10744}
10745
10746extern __inline __m512d
10747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10748_mm512_min_pd (__m512d __A, __m512d __B)
10749{
10750 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10751 (__v8df) __B,
10752 (__v8df)
10753 _mm512_setzero_pd (),
10754 (__mmask8) -1,
10755 _MM_FROUND_CUR_DIRECTION);
10756}
10757
10758extern __inline __m512d
10759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10760_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10761{
10762 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10763 (__v8df) __B,
10764 (__v8df) __W,
10765 (__mmask8) __U,
10766 _MM_FROUND_CUR_DIRECTION);
10767}
10768
10769extern __inline __m512d
10770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10771_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
10772{
10773 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10774 (__v8df) __B,
10775 (__v8df)
10776 _mm512_setzero_pd (),
10777 (__mmask8) __U,
10778 _MM_FROUND_CUR_DIRECTION);
10779}
10780
10781extern __inline __m512
10782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10783_mm512_min_ps (__m512 __A, __m512 __B)
10784{
10785 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
10786 (__v16sf) __B,
10787 (__v16sf)
10788 _mm512_setzero_ps (),
10789 (__mmask16) -1,
10790 _MM_FROUND_CUR_DIRECTION);
10791}
10792
10793extern __inline __m512
10794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10795_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10796{
10797 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
10798 (__v16sf) __B,
10799 (__v16sf) __W,
10800 (__mmask16) __U,
10801 _MM_FROUND_CUR_DIRECTION);
10802}
10803
10804extern __inline __m512
10805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10806_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
10807{
10808 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
10809 (__v16sf) __B,
10810 (__v16sf)
10811 _mm512_setzero_ps (),
10812 (__mmask16) __U,
10813 _MM_FROUND_CUR_DIRECTION);
10814}
10815
10816extern __inline __m512d
10817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10818_mm512_scalef_pd (__m512d __A, __m512d __B)
10819{
10820 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
10821 (__v8df) __B,
10822 (__v8df)
10823 _mm512_setzero_pd (),
10824 (__mmask8) -1,
10825 _MM_FROUND_CUR_DIRECTION);
10826}
10827
10828extern __inline __m512d
10829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10830_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10831{
10832 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
10833 (__v8df) __B,
10834 (__v8df) __W,
10835 (__mmask8) __U,
10836 _MM_FROUND_CUR_DIRECTION);
10837}
10838
10839extern __inline __m512d
10840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10841_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
10842{
10843 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
10844 (__v8df) __B,
10845 (__v8df)
10846 _mm512_setzero_pd (),
10847 (__mmask8) __U,
10848 _MM_FROUND_CUR_DIRECTION);
10849}
10850
10851extern __inline __m512
10852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10853_mm512_scalef_ps (__m512 __A, __m512 __B)
10854{
10855 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
10856 (__v16sf) __B,
10857 (__v16sf)
10858 _mm512_setzero_ps (),
10859 (__mmask16) -1,
10860 _MM_FROUND_CUR_DIRECTION);
10861}
10862
10863extern __inline __m512
10864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10865_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10866{
10867 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
10868 (__v16sf) __B,
10869 (__v16sf) __W,
10870 (__mmask16) __U,
10871 _MM_FROUND_CUR_DIRECTION);
10872}
10873
10874extern __inline __m512
10875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10876_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
10877{
10878 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
10879 (__v16sf) __B,
10880 (__v16sf)
10881 _mm512_setzero_ps (),
10882 (__mmask16) __U,
10883 _MM_FROUND_CUR_DIRECTION);
10884}
10885
075691af
AI
10886extern __inline __m128d
10887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10888_mm_scalef_sd (__m128d __A, __m128d __B)
10889{
10890 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
10891 (__v2df) __B,
10892 _MM_FROUND_CUR_DIRECTION);
10893}
10894
10895extern __inline __m128
10896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10897_mm_scalef_ss (__m128 __A, __m128 __B)
10898{
10899 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
10900 (__v4sf) __B,
10901 _MM_FROUND_CUR_DIRECTION);
10902}
10903
756c5857
AI
10904extern __inline __m512d
10905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10906_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
10907{
10908 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
10909 (__v8df) __B,
10910 (__v8df) __C,
10911 (__mmask8) -1,
10912 _MM_FROUND_CUR_DIRECTION);
10913}
10914
10915extern __inline __m512d
10916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10917_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
10918{
10919 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
10920 (__v8df) __B,
10921 (__v8df) __C,
10922 (__mmask8) __U,
10923 _MM_FROUND_CUR_DIRECTION);
10924}
10925
10926extern __inline __m512d
10927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10928_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
10929{
10930 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
10931 (__v8df) __B,
10932 (__v8df) __C,
10933 (__mmask8) __U,
10934 _MM_FROUND_CUR_DIRECTION);
10935}
10936
10937extern __inline __m512d
10938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10939_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
10940{
10941 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
10942 (__v8df) __B,
10943 (__v8df) __C,
10944 (__mmask8) __U,
10945 _MM_FROUND_CUR_DIRECTION);
10946}
10947
10948extern __inline __m512
10949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10950_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
10951{
10952 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
10953 (__v16sf) __B,
10954 (__v16sf) __C,
10955 (__mmask16) -1,
10956 _MM_FROUND_CUR_DIRECTION);
10957}
10958
10959extern __inline __m512
10960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10961_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
10962{
10963 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
10964 (__v16sf) __B,
10965 (__v16sf) __C,
10966 (__mmask16) __U,
10967 _MM_FROUND_CUR_DIRECTION);
10968}
10969
10970extern __inline __m512
10971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10972_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
10973{
10974 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
10975 (__v16sf) __B,
10976 (__v16sf) __C,
10977 (__mmask16) __U,
10978 _MM_FROUND_CUR_DIRECTION);
10979}
10980
10981extern __inline __m512
10982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10983_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
10984{
10985 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
10986 (__v16sf) __B,
10987 (__v16sf) __C,
10988 (__mmask16) __U,
10989 _MM_FROUND_CUR_DIRECTION);
10990}
10991
10992extern __inline __m512d
10993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10994_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
10995{
10996 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
10997 (__v8df) __B,
10998 -(__v8df) __C,
10999 (__mmask8) -1,
11000 _MM_FROUND_CUR_DIRECTION);
11001}
11002
11003extern __inline __m512d
11004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11005_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11006{
11007 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11008 (__v8df) __B,
11009 -(__v8df) __C,
11010 (__mmask8) __U,
11011 _MM_FROUND_CUR_DIRECTION);
11012}
11013
11014extern __inline __m512d
11015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11016_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11017{
11018 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11019 (__v8df) __B,
11020 (__v8df) __C,
11021 (__mmask8) __U,
11022 _MM_FROUND_CUR_DIRECTION);
11023}
11024
11025extern __inline __m512d
11026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11027_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11028{
11029 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11030 (__v8df) __B,
11031 -(__v8df) __C,
11032 (__mmask8) __U,
11033 _MM_FROUND_CUR_DIRECTION);
11034}
11035
11036extern __inline __m512
11037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11038_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11039{
11040 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11041 (__v16sf) __B,
11042 -(__v16sf) __C,
11043 (__mmask16) -1,
11044 _MM_FROUND_CUR_DIRECTION);
11045}
11046
11047extern __inline __m512
11048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11049_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11050{
11051 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11052 (__v16sf) __B,
11053 -(__v16sf) __C,
11054 (__mmask16) __U,
11055 _MM_FROUND_CUR_DIRECTION);
11056}
11057
11058extern __inline __m512
11059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11060_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11061{
11062 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11063 (__v16sf) __B,
11064 (__v16sf) __C,
11065 (__mmask16) __U,
11066 _MM_FROUND_CUR_DIRECTION);
11067}
11068
11069extern __inline __m512
11070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11071_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11072{
11073 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11074 (__v16sf) __B,
11075 -(__v16sf) __C,
11076 (__mmask16) __U,
11077 _MM_FROUND_CUR_DIRECTION);
11078}
11079
11080extern __inline __m512d
11081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11083{
11084 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11085 (__v8df) __B,
11086 (__v8df) __C,
11087 (__mmask8) -1,
11088 _MM_FROUND_CUR_DIRECTION);
11089}
11090
11091extern __inline __m512d
11092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11093_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11094{
11095 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11096 (__v8df) __B,
11097 (__v8df) __C,
11098 (__mmask8) __U,
11099 _MM_FROUND_CUR_DIRECTION);
11100}
11101
11102extern __inline __m512d
11103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11104_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11105{
11106 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11107 (__v8df) __B,
11108 (__v8df) __C,
11109 (__mmask8) __U,
11110 _MM_FROUND_CUR_DIRECTION);
11111}
11112
11113extern __inline __m512d
11114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11115_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11116{
11117 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11118 (__v8df) __B,
11119 (__v8df) __C,
11120 (__mmask8) __U,
11121 _MM_FROUND_CUR_DIRECTION);
11122}
11123
11124extern __inline __m512
11125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11126_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11127{
11128 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11129 (__v16sf) __B,
11130 (__v16sf) __C,
11131 (__mmask16) -1,
11132 _MM_FROUND_CUR_DIRECTION);
11133}
11134
11135extern __inline __m512
11136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11137_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11138{
11139 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11140 (__v16sf) __B,
11141 (__v16sf) __C,
11142 (__mmask16) __U,
11143 _MM_FROUND_CUR_DIRECTION);
11144}
11145
11146extern __inline __m512
11147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11148_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11149{
11150 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11151 (__v16sf) __B,
11152 (__v16sf) __C,
11153 (__mmask16) __U,
11154 _MM_FROUND_CUR_DIRECTION);
11155}
11156
11157extern __inline __m512
11158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11160{
11161 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11162 (__v16sf) __B,
11163 (__v16sf) __C,
11164 (__mmask16) __U,
11165 _MM_FROUND_CUR_DIRECTION);
11166}
11167
11168extern __inline __m512d
11169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11170_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11171{
11172 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11173 (__v8df) __B,
11174 -(__v8df) __C,
11175 (__mmask8) -1,
11176 _MM_FROUND_CUR_DIRECTION);
11177}
11178
11179extern __inline __m512d
11180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11181_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11182{
11183 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11184 (__v8df) __B,
11185 -(__v8df) __C,
11186 (__mmask8) __U,
11187 _MM_FROUND_CUR_DIRECTION);
11188}
11189
11190extern __inline __m512d
11191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11192_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11193{
11194 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11195 (__v8df) __B,
11196 (__v8df) __C,
11197 (__mmask8) __U,
11198 _MM_FROUND_CUR_DIRECTION);
11199}
11200
11201extern __inline __m512d
11202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11203_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11204{
11205 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11206 (__v8df) __B,
11207 -(__v8df) __C,
11208 (__mmask8) __U,
11209 _MM_FROUND_CUR_DIRECTION);
11210}
11211
11212extern __inline __m512
11213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11214_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11215{
11216 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11217 (__v16sf) __B,
11218 -(__v16sf) __C,
11219 (__mmask16) -1,
11220 _MM_FROUND_CUR_DIRECTION);
11221}
11222
11223extern __inline __m512
11224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11225_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11226{
11227 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11228 (__v16sf) __B,
11229 -(__v16sf) __C,
11230 (__mmask16) __U,
11231 _MM_FROUND_CUR_DIRECTION);
11232}
11233
11234extern __inline __m512
11235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11236_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11237{
11238 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11239 (__v16sf) __B,
11240 (__v16sf) __C,
11241 (__mmask16) __U,
11242 _MM_FROUND_CUR_DIRECTION);
11243}
11244
11245extern __inline __m512
11246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11247_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11248{
11249 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11250 (__v16sf) __B,
11251 -(__v16sf) __C,
11252 (__mmask16) __U,
11253 _MM_FROUND_CUR_DIRECTION);
11254}
11255
11256extern __inline __m512d
11257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11258_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11259{
11260 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11261 (__v8df) __B,
11262 (__v8df) __C,
11263 (__mmask8) -1,
11264 _MM_FROUND_CUR_DIRECTION);
11265}
11266
11267extern __inline __m512d
11268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11270{
11271 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11272 (__v8df) __B,
11273 (__v8df) __C,
11274 (__mmask8) __U,
11275 _MM_FROUND_CUR_DIRECTION);
11276}
11277
11278extern __inline __m512d
11279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11280_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11281{
11282 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11283 (__v8df) __B,
11284 (__v8df) __C,
11285 (__mmask8) __U,
11286 _MM_FROUND_CUR_DIRECTION);
11287}
11288
11289extern __inline __m512d
11290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11291_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11292{
11293 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11294 (__v8df) __B,
11295 (__v8df) __C,
11296 (__mmask8) __U,
11297 _MM_FROUND_CUR_DIRECTION);
11298}
11299
11300extern __inline __m512
11301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11302_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11303{
11304 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11305 (__v16sf) __B,
11306 (__v16sf) __C,
11307 (__mmask16) -1,
11308 _MM_FROUND_CUR_DIRECTION);
11309}
11310
11311extern __inline __m512
11312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11313_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11314{
11315 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11316 (__v16sf) __B,
11317 (__v16sf) __C,
11318 (__mmask16) __U,
11319 _MM_FROUND_CUR_DIRECTION);
11320}
11321
11322extern __inline __m512
11323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11324_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11325{
11326 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11327 (__v16sf) __B,
11328 (__v16sf) __C,
11329 (__mmask16) __U,
11330 _MM_FROUND_CUR_DIRECTION);
11331}
11332
11333extern __inline __m512
11334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11335_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11336{
11337 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11338 (__v16sf) __B,
11339 (__v16sf) __C,
11340 (__mmask16) __U,
11341 _MM_FROUND_CUR_DIRECTION);
11342}
11343
11344extern __inline __m512d
11345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11346_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11347{
11348 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11349 (__v8df) __B,
11350 -(__v8df) __C,
11351 (__mmask8) -1,
11352 _MM_FROUND_CUR_DIRECTION);
11353}
11354
11355extern __inline __m512d
11356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11358{
11359 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11360 (__v8df) __B,
11361 (__v8df) __C,
11362 (__mmask8) __U,
11363 _MM_FROUND_CUR_DIRECTION);
11364}
11365
11366extern __inline __m512d
11367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11369{
11370 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11371 (__v8df) __B,
11372 (__v8df) __C,
11373 (__mmask8) __U,
11374 _MM_FROUND_CUR_DIRECTION);
11375}
11376
11377extern __inline __m512d
11378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11379_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11380{
11381 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11382 (__v8df) __B,
11383 -(__v8df) __C,
11384 (__mmask8) __U,
11385 _MM_FROUND_CUR_DIRECTION);
11386}
11387
11388extern __inline __m512
11389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11390_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11391{
11392 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11393 (__v16sf) __B,
11394 -(__v16sf) __C,
11395 (__mmask16) -1,
11396 _MM_FROUND_CUR_DIRECTION);
11397}
11398
11399extern __inline __m512
11400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11401_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11402{
11403 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11404 (__v16sf) __B,
11405 (__v16sf) __C,
11406 (__mmask16) __U,
11407 _MM_FROUND_CUR_DIRECTION);
11408}
11409
11410extern __inline __m512
11411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11412_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11413{
11414 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11415 (__v16sf) __B,
11416 (__v16sf) __C,
11417 (__mmask16) __U,
11418 _MM_FROUND_CUR_DIRECTION);
11419}
11420
11421extern __inline __m512
11422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11423_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11424{
11425 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11426 (__v16sf) __B,
11427 -(__v16sf) __C,
11428 (__mmask16) __U,
11429 _MM_FROUND_CUR_DIRECTION);
11430}
11431
11432extern __inline __m256i
11433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11434_mm512_cvttpd_epi32 (__m512d __A)
11435{
11436 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11437 (__v8si)
11438 _mm256_setzero_si256 (),
11439 (__mmask8) -1,
11440 _MM_FROUND_CUR_DIRECTION);
11441}
11442
11443extern __inline __m256i
11444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11445_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11446{
11447 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11448 (__v8si) __W,
11449 (__mmask8) __U,
11450 _MM_FROUND_CUR_DIRECTION);
11451}
11452
11453extern __inline __m256i
11454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11455_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11456{
11457 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11458 (__v8si)
11459 _mm256_setzero_si256 (),
11460 (__mmask8) __U,
11461 _MM_FROUND_CUR_DIRECTION);
11462}
11463
11464extern __inline __m256i
11465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11466_mm512_cvttpd_epu32 (__m512d __A)
11467{
11468 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11469 (__v8si)
11470 _mm256_setzero_si256 (),
11471 (__mmask8) -1,
11472 _MM_FROUND_CUR_DIRECTION);
11473}
11474
11475extern __inline __m256i
11476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11478{
11479 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11480 (__v8si) __W,
11481 (__mmask8) __U,
11482 _MM_FROUND_CUR_DIRECTION);
11483}
11484
11485extern __inline __m256i
11486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11487_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11488{
11489 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11490 (__v8si)
11491 _mm256_setzero_si256 (),
11492 (__mmask8) __U,
11493 _MM_FROUND_CUR_DIRECTION);
11494}
11495
11496extern __inline __m256i
11497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11498_mm512_cvtpd_epi32 (__m512d __A)
11499{
11500 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11501 (__v8si)
11502 _mm256_setzero_si256 (),
11503 (__mmask8) -1,
11504 _MM_FROUND_CUR_DIRECTION);
11505}
11506
11507extern __inline __m256i
11508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11509_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11510{
11511 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11512 (__v8si) __W,
11513 (__mmask8) __U,
11514 _MM_FROUND_CUR_DIRECTION);
11515}
11516
11517extern __inline __m256i
11518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11519_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11520{
11521 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11522 (__v8si)
11523 _mm256_setzero_si256 (),
11524 (__mmask8) __U,
11525 _MM_FROUND_CUR_DIRECTION);
11526}
11527
11528extern __inline __m256i
11529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11530_mm512_cvtpd_epu32 (__m512d __A)
11531{
11532 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11533 (__v8si)
11534 _mm256_setzero_si256 (),
11535 (__mmask8) -1,
11536 _MM_FROUND_CUR_DIRECTION);
11537}
11538
11539extern __inline __m256i
11540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11541_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11542{
11543 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11544 (__v8si) __W,
11545 (__mmask8) __U,
11546 _MM_FROUND_CUR_DIRECTION);
11547}
11548
11549extern __inline __m256i
11550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11551_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11552{
11553 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11554 (__v8si)
11555 _mm256_setzero_si256 (),
11556 (__mmask8) __U,
11557 _MM_FROUND_CUR_DIRECTION);
11558}
11559
11560extern __inline __m512i
11561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11562_mm512_cvttps_epi32 (__m512 __A)
11563{
11564 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11565 (__v16si)
11566 _mm512_setzero_si512 (),
11567 (__mmask16) -1,
11568 _MM_FROUND_CUR_DIRECTION);
11569}
11570
11571extern __inline __m512i
11572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11573_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11574{
11575 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11576 (__v16si) __W,
11577 (__mmask16) __U,
11578 _MM_FROUND_CUR_DIRECTION);
11579}
11580
11581extern __inline __m512i
11582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11583_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11584{
11585 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11586 (__v16si)
11587 _mm512_setzero_si512 (),
11588 (__mmask16) __U,
11589 _MM_FROUND_CUR_DIRECTION);
11590}
11591
11592extern __inline __m512i
11593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11594_mm512_cvttps_epu32 (__m512 __A)
11595{
11596 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11597 (__v16si)
11598 _mm512_setzero_si512 (),
11599 (__mmask16) -1,
11600 _MM_FROUND_CUR_DIRECTION);
11601}
11602
11603extern __inline __m512i
11604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11605_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11606{
11607 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11608 (__v16si) __W,
11609 (__mmask16) __U,
11610 _MM_FROUND_CUR_DIRECTION);
11611}
11612
11613extern __inline __m512i
11614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11615_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11616{
11617 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11618 (__v16si)
11619 _mm512_setzero_si512 (),
11620 (__mmask16) __U,
11621 _MM_FROUND_CUR_DIRECTION);
11622}
11623
11624extern __inline __m512i
11625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11626_mm512_cvtps_epi32 (__m512 __A)
11627{
11628 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11629 (__v16si)
11630 _mm512_setzero_si512 (),
11631 (__mmask16) -1,
11632 _MM_FROUND_CUR_DIRECTION);
11633}
11634
11635extern __inline __m512i
11636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11637_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11638{
11639 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11640 (__v16si) __W,
11641 (__mmask16) __U,
11642 _MM_FROUND_CUR_DIRECTION);
11643}
11644
11645extern __inline __m512i
11646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11647_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11648{
11649 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11650 (__v16si)
11651 _mm512_setzero_si512 (),
11652 (__mmask16) __U,
11653 _MM_FROUND_CUR_DIRECTION);
11654}
11655
11656extern __inline __m512i
11657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11658_mm512_cvtps_epu32 (__m512 __A)
11659{
11660 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11661 (__v16si)
11662 _mm512_setzero_si512 (),
11663 (__mmask16) -1,
11664 _MM_FROUND_CUR_DIRECTION);
11665}
11666
11667extern __inline __m512i
11668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11669_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11670{
11671 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11672 (__v16si) __W,
11673 (__mmask16) __U,
11674 _MM_FROUND_CUR_DIRECTION);
11675}
11676
11677extern __inline __m512i
11678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11679_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11680{
11681 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11682 (__v16si)
11683 _mm512_setzero_si512 (),
11684 (__mmask16) __U,
11685 _MM_FROUND_CUR_DIRECTION);
11686}
11687
11688#ifdef __x86_64__
11689extern __inline __m128
11690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11691_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11692{
11693 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11694 _MM_FROUND_CUR_DIRECTION);
11695}
11696
11697extern __inline __m128d
11698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11700{
11701 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11702 _MM_FROUND_CUR_DIRECTION);
11703}
11704#endif
11705
11706extern __inline __m128
11707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11708_mm_cvtu32_ss (__m128 __A, unsigned __B)
11709{
11710 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11711 _MM_FROUND_CUR_DIRECTION);
11712}
11713
11714extern __inline __m512
11715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716_mm512_cvtepi32_ps (__m512i __A)
11717{
11718 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11719 (__v16sf)
11720 _mm512_setzero_ps (),
11721 (__mmask16) -1,
11722 _MM_FROUND_CUR_DIRECTION);
11723}
11724
11725extern __inline __m512
11726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11727_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11728{
11729 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11730 (__v16sf) __W,
11731 (__mmask16) __U,
11732 _MM_FROUND_CUR_DIRECTION);
11733}
11734
11735extern __inline __m512
11736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11737_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11738{
11739 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11740 (__v16sf)
11741 _mm512_setzero_ps (),
11742 (__mmask16) __U,
11743 _MM_FROUND_CUR_DIRECTION);
11744}
11745
11746extern __inline __m512
11747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11748_mm512_cvtepu32_ps (__m512i __A)
11749{
11750 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11751 (__v16sf)
11752 _mm512_setzero_ps (),
11753 (__mmask16) -1,
11754 _MM_FROUND_CUR_DIRECTION);
11755}
11756
11757extern __inline __m512
11758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11759_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11760{
11761 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11762 (__v16sf) __W,
11763 (__mmask16) __U,
11764 _MM_FROUND_CUR_DIRECTION);
11765}
11766
11767extern __inline __m512
11768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11769_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
11770{
11771 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11772 (__v16sf)
11773 _mm512_setzero_ps (),
11774 (__mmask16) __U,
11775 _MM_FROUND_CUR_DIRECTION);
11776}
11777
11778#ifdef __OPTIMIZE__
11779extern __inline __m512d
11780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11781_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
11782{
11783 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
11784 (__v8df) __B,
11785 (__v8di) __C,
11786 __imm,
11787 (__mmask8) -1,
11788 _MM_FROUND_CUR_DIRECTION);
11789}
11790
11791extern __inline __m512d
11792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11793_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
11794 __m512i __C, const int __imm)
11795{
11796 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
11797 (__v8df) __B,
11798 (__v8di) __C,
11799 __imm,
11800 (__mmask8) __U,
11801 _MM_FROUND_CUR_DIRECTION);
11802}
11803
11804extern __inline __m512d
11805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11806_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
11807 __m512i __C, const int __imm)
11808{
11809 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
11810 (__v8df) __B,
11811 (__v8di) __C,
11812 __imm,
11813 (__mmask8) __U,
11814 _MM_FROUND_CUR_DIRECTION);
11815}
11816
11817extern __inline __m512
11818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11819_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
11820{
11821 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
11822 (__v16sf) __B,
11823 (__v16si) __C,
11824 __imm,
11825 (__mmask16) -1,
11826 _MM_FROUND_CUR_DIRECTION);
11827}
11828
11829extern __inline __m512
11830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11831_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
11832 __m512i __C, const int __imm)
11833{
11834 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
11835 (__v16sf) __B,
11836 (__v16si) __C,
11837 __imm,
11838 (__mmask16) __U,
11839 _MM_FROUND_CUR_DIRECTION);
11840}
11841
11842extern __inline __m512
11843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11844_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
11845 __m512i __C, const int __imm)
11846{
11847 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
11848 (__v16sf) __B,
11849 (__v16si) __C,
11850 __imm,
11851 (__mmask16) __U,
11852 _MM_FROUND_CUR_DIRECTION);
11853}
11854
11855extern __inline __m128d
11856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11857_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
11858{
11859 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
11860 (__v2df) __B,
11861 (__v2di) __C, __imm,
11862 (__mmask8) -1,
11863 _MM_FROUND_CUR_DIRECTION);
11864}
11865
11866extern __inline __m128d
11867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
11869 __m128i __C, const int __imm)
11870{
11871 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
11872 (__v2df) __B,
11873 (__v2di) __C, __imm,
11874 (__mmask8) __U,
11875 _MM_FROUND_CUR_DIRECTION);
11876}
11877
11878extern __inline __m128d
11879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11880_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
11881 __m128i __C, const int __imm)
11882{
11883 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
11884 (__v2df) __B,
11885 (__v2di) __C,
11886 __imm,
11887 (__mmask8) __U,
11888 _MM_FROUND_CUR_DIRECTION);
11889}
11890
11891extern __inline __m128
11892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11893_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
11894{
11895 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
11896 (__v4sf) __B,
11897 (__v4si) __C, __imm,
11898 (__mmask8) -1,
11899 _MM_FROUND_CUR_DIRECTION);
11900}
11901
11902extern __inline __m128
11903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11904_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
11905 __m128i __C, const int __imm)
11906{
11907 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
11908 (__v4sf) __B,
11909 (__v4si) __C, __imm,
11910 (__mmask8) __U,
11911 _MM_FROUND_CUR_DIRECTION);
11912}
11913
11914extern __inline __m128
11915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11916_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
11917 __m128i __C, const int __imm)
11918{
11919 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
11920 (__v4sf) __B,
11921 (__v4si) __C, __imm,
11922 (__mmask8) __U,
11923 _MM_FROUND_CUR_DIRECTION);
11924}
11925#else
11926#define _mm512_fixupimm_pd(X, Y, Z, C) \
11927 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
11928 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
11929 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
11930
11931#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
11932 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
11933 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
11934 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
11935
11936#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
11937 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
11938 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
11939 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
11940
11941#define _mm512_fixupimm_ps(X, Y, Z, C) \
11942 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
11943 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
11944 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
11945
11946#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
11947 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
11948 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
11949 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
11950
11951#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
11952 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
11953 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
11954 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
11955
11956#define _mm_fixupimm_sd(X, Y, Z, C) \
11957 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
11958 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
11959 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
11960
11961#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
11962 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
11963 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
11964 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
11965
11966#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
11967 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
11968 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
11969 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
11970
11971#define _mm_fixupimm_ss(X, Y, Z, C) \
11972 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
11973 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
11974 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
11975
11976#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
11977 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
11978 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
11979 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
11980
11981#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
11982 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
11983 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
11984 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
11985#endif
11986
11987#ifdef __x86_64__
11988extern __inline unsigned long long
11989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11990_mm_cvtss_u64 (__m128 __A)
11991{
11992 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
11993 __A,
11994 _MM_FROUND_CUR_DIRECTION);
11995}
11996
11997extern __inline unsigned long long
11998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11999_mm_cvttss_u64 (__m128 __A)
12000{
12001 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12002 __A,
12003 _MM_FROUND_CUR_DIRECTION);
12004}
12005
12006extern __inline long long
12007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12008_mm_cvttss_i64 (__m128 __A)
12009{
12010 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12011 _MM_FROUND_CUR_DIRECTION);
12012}
12013#endif /* __x86_64__ */
12014
12015extern __inline unsigned
12016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12017_mm_cvtss_u32 (__m128 __A)
12018{
12019 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12020 _MM_FROUND_CUR_DIRECTION);
12021}
12022
12023extern __inline unsigned
12024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12025_mm_cvttss_u32 (__m128 __A)
12026{
12027 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12028 _MM_FROUND_CUR_DIRECTION);
12029}
12030
12031extern __inline int
12032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12033_mm_cvttss_i32 (__m128 __A)
12034{
12035 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12036 _MM_FROUND_CUR_DIRECTION);
12037}
12038
12039#ifdef __x86_64__
12040extern __inline unsigned long long
12041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12042_mm_cvtsd_u64 (__m128d __A)
12043{
12044 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12045 __A,
12046 _MM_FROUND_CUR_DIRECTION);
12047}
12048
12049extern __inline unsigned long long
12050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12051_mm_cvttsd_u64 (__m128d __A)
12052{
12053 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12054 __A,
12055 _MM_FROUND_CUR_DIRECTION);
12056}
12057
12058extern __inline long long
12059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12060_mm_cvttsd_i64 (__m128d __A)
12061{
12062 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12063 _MM_FROUND_CUR_DIRECTION);
12064}
12065#endif /* __x86_64__ */
12066
12067extern __inline unsigned
12068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12069_mm_cvtsd_u32 (__m128d __A)
12070{
12071 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12072 _MM_FROUND_CUR_DIRECTION);
12073}
12074
12075extern __inline unsigned
12076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12077_mm_cvttsd_u32 (__m128d __A)
12078{
12079 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12080 _MM_FROUND_CUR_DIRECTION);
12081}
12082
12083extern __inline int
12084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12085_mm_cvttsd_i32 (__m128d __A)
12086{
12087 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12088 _MM_FROUND_CUR_DIRECTION);
12089}
12090
12091extern __inline __m512d
12092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12093_mm512_cvtps_pd (__m256 __A)
12094{
12095 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12096 (__v8df)
12097 _mm512_setzero_pd (),
12098 (__mmask8) -1,
12099 _MM_FROUND_CUR_DIRECTION);
12100}
12101
12102extern __inline __m512d
12103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12104_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12105{
12106 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12107 (__v8df) __W,
12108 (__mmask8) __U,
12109 _MM_FROUND_CUR_DIRECTION);
12110}
12111
12112extern __inline __m512d
12113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12114_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12115{
12116 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12117 (__v8df)
12118 _mm512_setzero_pd (),
12119 (__mmask8) __U,
12120 _MM_FROUND_CUR_DIRECTION);
12121}
12122
12123extern __inline __m512
12124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12125_mm512_cvtph_ps (__m256i __A)
12126{
12127 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12128 (__v16sf)
12129 _mm512_setzero_ps (),
12130 (__mmask16) -1,
12131 _MM_FROUND_CUR_DIRECTION);
12132}
12133
12134extern __inline __m512
12135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12136_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12137{
12138 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12139 (__v16sf) __W,
12140 (__mmask16) __U,
12141 _MM_FROUND_CUR_DIRECTION);
12142}
12143
12144extern __inline __m512
12145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12146_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12147{
12148 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12149 (__v16sf)
12150 _mm512_setzero_ps (),
12151 (__mmask16) __U,
12152 _MM_FROUND_CUR_DIRECTION);
12153}
12154
12155extern __inline __m256
12156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12157_mm512_cvtpd_ps (__m512d __A)
12158{
12159 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12160 (__v8sf)
12161 _mm256_setzero_ps (),
12162 (__mmask8) -1,
12163 _MM_FROUND_CUR_DIRECTION);
12164}
12165
12166extern __inline __m256
12167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12168_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12169{
12170 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12171 (__v8sf) __W,
12172 (__mmask8) __U,
12173 _MM_FROUND_CUR_DIRECTION);
12174}
12175
12176extern __inline __m256
12177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12178_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12179{
12180 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12181 (__v8sf)
12182 _mm256_setzero_ps (),
12183 (__mmask8) __U,
12184 _MM_FROUND_CUR_DIRECTION);
12185}
12186
12187#ifdef __OPTIMIZE__
12188extern __inline __m512
12189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12190_mm512_getexp_ps (__m512 __A)
12191{
12192 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12193 (__v16sf)
12194 _mm512_setzero_ps (),
12195 (__mmask16) -1,
12196 _MM_FROUND_CUR_DIRECTION);
12197}
12198
12199extern __inline __m512
12200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12201_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12202{
12203 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12204 (__v16sf) __W,
12205 (__mmask16) __U,
12206 _MM_FROUND_CUR_DIRECTION);
12207}
12208
12209extern __inline __m512
12210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12211_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12212{
12213 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12214 (__v16sf)
12215 _mm512_setzero_ps (),
12216 (__mmask16) __U,
12217 _MM_FROUND_CUR_DIRECTION);
12218}
12219
12220extern __inline __m512d
12221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12222_mm512_getexp_pd (__m512d __A)
12223{
12224 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12225 (__v8df)
12226 _mm512_setzero_pd (),
12227 (__mmask8) -1,
12228 _MM_FROUND_CUR_DIRECTION);
12229}
12230
12231extern __inline __m512d
12232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12233_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12234{
12235 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12236 (__v8df) __W,
12237 (__mmask8) __U,
12238 _MM_FROUND_CUR_DIRECTION);
12239}
12240
12241extern __inline __m512d
12242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12243_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12244{
12245 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12246 (__v8df)
12247 _mm512_setzero_pd (),
12248 (__mmask8) __U,
12249 _MM_FROUND_CUR_DIRECTION);
12250}
12251
075691af
AI
12252extern __inline __m128
12253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12254_mm_getexp_ss (__m128 __A, __m128 __B)
12255{
12256 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12257 (__v4sf) __B,
12258 _MM_FROUND_CUR_DIRECTION);
12259}
12260
12261extern __inline __m128d
12262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12263_mm_getexp_sd (__m128d __A, __m128d __B)
12264{
12265 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12266 (__v2df) __B,
12267 _MM_FROUND_CUR_DIRECTION);
12268}
12269
756c5857
AI
12270extern __inline __m512d
12271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12272_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12273 _MM_MANTISSA_SIGN_ENUM __C)
12274{
12275 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12276 (__C << 2) | __B,
12277 _mm512_setzero_pd (),
12278 (__mmask8) -1,
12279 _MM_FROUND_CUR_DIRECTION);
12280}
12281
12282extern __inline __m512d
12283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12284_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12285 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12286{
12287 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12288 (__C << 2) | __B,
12289 (__v8df) __W, __U,
12290 _MM_FROUND_CUR_DIRECTION);
12291}
12292
12293extern __inline __m512d
12294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12295_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12296 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12297{
12298 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12299 (__C << 2) | __B,
12300 (__v8df)
12301 _mm512_setzero_pd (),
12302 __U,
12303 _MM_FROUND_CUR_DIRECTION);
12304}
12305
12306extern __inline __m512
12307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12308_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12309 _MM_MANTISSA_SIGN_ENUM __C)
12310{
12311 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12312 (__C << 2) | __B,
12313 _mm512_setzero_ps (),
12314 (__mmask16) -1,
12315 _MM_FROUND_CUR_DIRECTION);
12316}
12317
12318extern __inline __m512
12319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12320_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12321 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12322{
12323 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12324 (__C << 2) | __B,
12325 (__v16sf) __W, __U,
12326 _MM_FROUND_CUR_DIRECTION);
12327}
12328
12329extern __inline __m512
12330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12331_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12332 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12333{
12334 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12335 (__C << 2) | __B,
12336 (__v16sf)
12337 _mm512_setzero_ps (),
12338 __U,
12339 _MM_FROUND_CUR_DIRECTION);
12340}
12341
075691af
AI
12342extern __inline __m128d
12343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12344_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12345 _MM_MANTISSA_SIGN_ENUM __D)
12346{
12347 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12348 (__v2df) __B,
12349 (__D << 2) | __C,
12350 _MM_FROUND_CUR_DIRECTION);
12351}
12352
12353extern __inline __m128
12354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12355_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12356 _MM_MANTISSA_SIGN_ENUM __D)
12357{
12358 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12359 (__v4sf) __B,
12360 (__D << 2) | __C,
12361 _MM_FROUND_CUR_DIRECTION);
12362}
12363
756c5857
AI
12364#else
12365#define _mm512_getmant_pd(X, B, C) \
12366 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12367 (int)(((C)<<2) | (B)), \
12368 (__v8df)(__m512d)_mm512_setzero_pd(), \
12369 (__mmask8)-1,\
12370 _MM_FROUND_CUR_DIRECTION))
12371
12372#define _mm512_mask_getmant_pd(W, U, X, B, C) \
12373 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12374 (int)(((C)<<2) | (B)), \
12375 (__v8df)(__m512d)(W), \
12376 (__mmask8)(U),\
12377 _MM_FROUND_CUR_DIRECTION))
12378
12379#define _mm512_maskz_getmant_pd(U, X, B, C) \
12380 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12381 (int)(((C)<<2) | (B)), \
12382 (__v8df)(__m512d)_mm512_setzero_pd(), \
12383 (__mmask8)(U),\
12384 _MM_FROUND_CUR_DIRECTION))
12385#define _mm512_getmant_ps(X, B, C) \
12386 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12387 (int)(((C)<<2) | (B)), \
12388 (__v16sf)(__m512)_mm512_setzero_ps(), \
12389 (__mmask16)-1,\
12390 _MM_FROUND_CUR_DIRECTION))
12391
12392#define _mm512_mask_getmant_ps(W, U, X, B, C) \
12393 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12394 (int)(((C)<<2) | (B)), \
12395 (__v16sf)(__m512)(W), \
12396 (__mmask16)(U),\
12397 _MM_FROUND_CUR_DIRECTION))
12398
12399#define _mm512_maskz_getmant_ps(U, X, B, C) \
12400 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12401 (int)(((C)<<2) | (B)), \
12402 (__v16sf)(__m512)_mm512_setzero_ps(), \
12403 (__mmask16)(U),\
12404 _MM_FROUND_CUR_DIRECTION))
075691af
AI
12405#define _mm_getmant_sd(X, Y, C, D) \
12406 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12407 (__v2df)(__m128d)(Y), \
12408 (int)(((D)<<2) | (C)), \
12409 _MM_FROUND_CUR_DIRECTION))
12410
12411#define _mm_getmant_ss(X, Y, C, D) \
12412 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12413 (__v4sf)(__m128)(Y), \
12414 (int)(((D)<<2) | (C)), \
12415 _MM_FROUND_CUR_DIRECTION))
12416
12417#define _mm_getexp_ss(A, B) \
12418 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12419 _MM_FROUND_CUR_DIRECTION))
12420
12421#define _mm_getexp_sd(A, B) \
12422 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12423 _MM_FROUND_CUR_DIRECTION))
12424
756c5857
AI
12425#define _mm512_getexp_ps(A) \
12426 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12427 (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12428
12429#define _mm512_mask_getexp_ps(W, U, A) \
12430 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12431 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12432
12433#define _mm512_maskz_getexp_ps(U, A) \
12434 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12435 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12436
12437#define _mm512_getexp_pd(A) \
12438 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12439 (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12440
12441#define _mm512_mask_getexp_pd(W, U, A) \
12442 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12443 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12444
12445#define _mm512_maskz_getexp_pd(U, A) \
12446 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12447 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12448#endif
12449
12450#ifdef __OPTIMIZE__
12451extern __inline __m512
12452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12453_mm512_roundscale_ps (__m512 __A, const int __imm)
12454{
12455 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12456 (__v16sf) __A, -1,
12457 _MM_FROUND_CUR_DIRECTION);
12458}
12459
12460extern __inline __m512
12461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12462_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12463 const int __imm)
12464{
12465 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12466 (__v16sf) __A,
12467 (__mmask16) __B,
12468 _MM_FROUND_CUR_DIRECTION);
12469}
12470
12471extern __inline __m512
12472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12473_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12474{
12475 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12476 __imm,
12477 (__v16sf)
12478 _mm512_setzero_ps (),
12479 (__mmask16) __A,
12480 _MM_FROUND_CUR_DIRECTION);
12481}
12482
12483extern __inline __m512d
12484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12485_mm512_roundscale_pd (__m512d __A, const int __imm)
12486{
12487 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12488 (__v8df) __A, -1,
12489 _MM_FROUND_CUR_DIRECTION);
12490}
12491
12492extern __inline __m512d
12493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12494_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12495 const int __imm)
12496{
12497 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12498 (__v8df) __A,
12499 (__mmask8) __B,
12500 _MM_FROUND_CUR_DIRECTION);
12501}
12502
12503extern __inline __m512d
12504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12505_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12506{
12507 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12508 __imm,
12509 (__v8df)
12510 _mm512_setzero_pd (),
12511 (__mmask8) __A,
12512 _MM_FROUND_CUR_DIRECTION);
12513}
12514
075691af
AI
12515extern __inline __m128
12516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12517_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12518{
12519 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12520 (__v4sf) __B, __imm,
12521 _MM_FROUND_CUR_DIRECTION);
12522}
12523
12524extern __inline __m128d
12525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12526_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12527{
12528 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12529 (__v2df) __B, __imm,
12530 _MM_FROUND_CUR_DIRECTION);
12531}
12532
756c5857
AI
12533#else
12534#define _mm512_roundscale_ps(A, B) \
12535 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12536 (__v16sf)(__m512)(A), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12537#define _mm512_mask_roundscale_ps(A, B, C, D) \
12538 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12539 (int)(D), \
12540 (__v16sf)(__m512)(A), \
12541 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12542#define _mm512_maskz_roundscale_ps(A, B, C) \
12543 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12544 (int)(C), \
12545 (__v16sf)_mm512_setzero_ps(),\
12546 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12547#define _mm512_roundscale_pd(A, B) \
12548 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12549 (__v8df)(__m512d)(A), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12550#define _mm512_mask_roundscale_pd(A, B, C, D) \
12551 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12552 (int)(D), \
12553 (__v8df)(__m512d)(A), \
12554 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12555#define _mm512_maskz_roundscale_pd(A, B, C) \
12556 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12557 (int)(C), \
12558 (__v8df)_mm512_setzero_pd(),\
12559 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
075691af
AI
12560#define _mm_roundscale_ss(A, B, C) \
12561 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12562 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12563#define _mm_roundscale_sd(A, B, C) \
12564 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12565 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
12566#endif
12567
12568#ifdef __OPTIMIZE__
12569extern __inline __mmask8
12570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12571_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12572{
12573 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12574 (__v8df) __Y, __P,
12575 (__mmask8) -1,
12576 _MM_FROUND_CUR_DIRECTION);
12577}
12578
12579extern __inline __mmask16
12580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12581_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12582{
12583 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12584 (__v16sf) __Y, __P,
12585 (__mmask16) -1,
12586 _MM_FROUND_CUR_DIRECTION);
12587}
12588
12589extern __inline __mmask16
12590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12591_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12592{
12593 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12594 (__v16sf) __Y, __P,
12595 (__mmask16) __U,
12596 _MM_FROUND_CUR_DIRECTION);
12597}
12598
12599extern __inline __mmask8
12600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12601_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12602{
12603 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12604 (__v8df) __Y, __P,
12605 (__mmask8) __U,
12606 _MM_FROUND_CUR_DIRECTION);
12607}
12608
12609extern __inline __mmask8
12610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12611_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12612{
12613 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12614 (__v2df) __Y, __P,
12615 (__mmask8) -1,
12616 _MM_FROUND_CUR_DIRECTION);
12617}
12618
12619extern __inline __mmask8
12620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12621_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12622{
12623 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12624 (__v2df) __Y, __P,
12625 (__mmask8) __M,
12626 _MM_FROUND_CUR_DIRECTION);
12627}
12628
12629extern __inline __mmask8
12630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12631_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12632{
12633 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12634 (__v4sf) __Y, __P,
12635 (__mmask8) -1,
12636 _MM_FROUND_CUR_DIRECTION);
12637}
12638
12639extern __inline __mmask8
12640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12641_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12642{
12643 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12644 (__v4sf) __Y, __P,
12645 (__mmask8) __M,
12646 _MM_FROUND_CUR_DIRECTION);
12647}
12648
12649#else
12650#define _mm512_cmp_pd_mask(X, Y, P) \
12651 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12652 (__v8df)(__m512d)(Y), (int)(P),\
12653 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12654
12655#define _mm512_cmp_ps_mask(X, Y, P) \
12656 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12657 (__v16sf)(__m512)(Y), (int)(P),\
12658 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12659
12660#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12661 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12662 (__v8df)(__m512d)(Y), (int)(P),\
12663 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12664
12665#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12666 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12667 (__v16sf)(__m512)(Y), (int)(P),\
12668 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12669
12670#define _mm_cmp_sd_mask(X, Y, P) \
12671 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12672 (__v2df)(__m128d)(Y), (int)(P),\
12673 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12674
12675#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12676 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12677 (__v2df)(__m128d)(Y), (int)(P),\
12678 M,_MM_FROUND_CUR_DIRECTION))
12679
12680#define _mm_cmp_ss_mask(X, Y, P) \
12681 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12682 (__v4sf)(__m128)(Y), (int)(P), \
12683 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12684
12685#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12686 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12687 (__v4sf)(__m128)(Y), (int)(P), \
12688 M,_MM_FROUND_CUR_DIRECTION))
12689#endif
12690
12691#ifdef __DISABLE_AVX512F__
12692#undef __DISABLE_AVX512F__
12693#pragma GCC pop_options
12694#endif /* __DISABLE_AVX512F__ */
12695
12696#endif /* _AVX512FINTRIN_H_INCLUDED */