]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512vlintrin.h
re PR target/88794 (fixupimm intrinsics are unusable)
[thirdparty/gcc.git] / gcc / config / i386 / avx512vlintrin.h
CommitLineData
a5544970 1/* Copyright (C) 2014-2019 Free Software Foundation, Inc.
936c0fe4
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLINTRIN_H_INCLUDED
29#define _AVX512VLINTRIN_H_INCLUDED
30
936c0fe4
AI
31#ifndef __AVX512VL__
32#pragma GCC push_options
33#pragma GCC target("avx512vl")
34#define __DISABLE_AVX512VL__
35#endif /* __AVX512VL__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef unsigned int __mmask32;
39
40extern __inline __m256d
41__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
42_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
43{
44 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
45 (__v4df) __W,
46 (__mmask8) __U);
47}
48
49extern __inline __m256d
50__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
51_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
52{
53 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
54 (__v4df)
55 _mm256_setzero_pd (),
56 (__mmask8) __U);
57}
58
59extern __inline __m128d
60__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
62{
63 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
64 (__v2df) __W,
65 (__mmask8) __U);
66}
67
68extern __inline __m128d
69__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
71{
72 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
73 (__v2df)
74 _mm_setzero_pd (),
75 (__mmask8) __U);
76}
77
78extern __inline __m256d
79__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
81{
82 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
83 (__v4df) __W,
84 (__mmask8) __U);
85}
86
87extern __inline __m256d
88__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
90{
91 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
92 (__v4df)
93 _mm256_setzero_pd (),
94 (__mmask8) __U);
95}
96
97extern __inline __m128d
98__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
100{
101 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
102 (__v2df) __W,
103 (__mmask8) __U);
104}
105
106extern __inline __m128d
107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108_mm_maskz_load_pd (__mmask8 __U, void const *__P)
109{
110 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
111 (__v2df)
112 _mm_setzero_pd (),
113 (__mmask8) __U);
114}
115
116extern __inline void
117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
119{
120 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
121 (__v4df) __A,
122 (__mmask8) __U);
123}
124
125extern __inline void
126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
128{
129 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
130 (__v2df) __A,
131 (__mmask8) __U);
132}
133
134extern __inline __m256
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
137{
138 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
139 (__v8sf) __W,
140 (__mmask8) __U);
141}
142
143extern __inline __m256
144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
146{
147 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
148 (__v8sf)
149 _mm256_setzero_ps (),
150 (__mmask8) __U);
151}
152
153extern __inline __m128
154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
156{
157 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
158 (__v4sf) __W,
159 (__mmask8) __U);
160}
161
162extern __inline __m128
163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
165{
166 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
167 (__v4sf)
168 _mm_setzero_ps (),
169 (__mmask8) __U);
170}
171
172extern __inline __m256
173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
175{
176 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
177 (__v8sf) __W,
178 (__mmask8) __U);
179}
180
181extern __inline __m256
182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
184{
185 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
186 (__v8sf)
187 _mm256_setzero_ps (),
188 (__mmask8) __U);
189}
190
191extern __inline __m128
192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
194{
195 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
196 (__v4sf) __W,
197 (__mmask8) __U);
198}
199
200extern __inline __m128
201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202_mm_maskz_load_ps (__mmask8 __U, void const *__P)
203{
204 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
205 (__v4sf)
206 _mm_setzero_ps (),
207 (__mmask8) __U);
208}
209
210extern __inline void
211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
213{
214 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
215 (__v8sf) __A,
216 (__mmask8) __U);
217}
218
219extern __inline void
220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
222{
223 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
224 (__v4sf) __A,
225 (__mmask8) __U);
226}
227
228extern __inline __m256i
229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
231{
232 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
233 (__v4di) __W,
234 (__mmask8) __U);
235}
236
237extern __inline __m256i
238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
240{
241 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
242 (__v4di)
243 _mm256_setzero_si256 (),
244 (__mmask8) __U);
245}
246
247extern __inline __m128i
248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
249_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
250{
251 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
252 (__v2di) __W,
253 (__mmask8) __U);
254}
255
256extern __inline __m128i
257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
259{
260 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
261 (__v2di)
a25a7887 262 _mm_setzero_si128 (),
936c0fe4
AI
263 (__mmask8) __U);
264}
265
266extern __inline __m256i
267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
269{
270 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
271 (__v4di) __W,
272 (__mmask8)
273 __U);
274}
275
276extern __inline __m256i
277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
278_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
279{
280 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
281 (__v4di)
282 _mm256_setzero_si256 (),
283 (__mmask8)
284 __U);
285}
286
287extern __inline __m128i
288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
290{
291 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
292 (__v2di) __W,
293 (__mmask8)
294 __U);
295}
296
297extern __inline __m128i
298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
299_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
300{
301 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
302 (__v2di)
a25a7887 303 _mm_setzero_si128 (),
936c0fe4
AI
304 (__mmask8)
305 __U);
306}
307
308extern __inline void
309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
311{
312 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
313 (__v4di) __A,
314 (__mmask8) __U);
315}
316
317extern __inline void
318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
320{
321 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
322 (__v2di) __A,
323 (__mmask8) __U);
324}
325
326extern __inline __m256i
327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
329{
330 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
331 (__v8si) __W,
332 (__mmask8) __U);
333}
334
335extern __inline __m256i
336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
338{
339 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
340 (__v8si)
341 _mm256_setzero_si256 (),
342 (__mmask8) __U);
343}
344
345extern __inline __m128i
346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
348{
349 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
350 (__v4si) __W,
351 (__mmask8) __U);
352}
353
354extern __inline __m128i
355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
357{
358 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
359 (__v4si)
360 _mm_setzero_si128 (),
361 (__mmask8) __U);
362}
363
364extern __inline __m256i
365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
367{
368 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
369 (__v8si) __W,
370 (__mmask8)
371 __U);
372}
373
374extern __inline __m256i
375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
377{
378 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
379 (__v8si)
380 _mm256_setzero_si256 (),
381 (__mmask8)
382 __U);
383}
384
385extern __inline __m128i
386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
388{
389 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
390 (__v4si) __W,
391 (__mmask8)
392 __U);
393}
394
395extern __inline __m128i
396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
398{
399 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
400 (__v4si)
401 _mm_setzero_si128 (),
402 (__mmask8)
403 __U);
404}
405
406extern __inline void
407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
409{
410 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
411 (__v8si) __A,
412 (__mmask8) __U);
413}
414
415extern __inline void
416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
418{
419 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
420 (__v4si) __A,
421 (__mmask8) __U);
422}
423
936c0fe4
AI
424extern __inline __m128d
425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
427{
428 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
429 (__v2df) __B,
430 (__v2df) __W,
431 (__mmask8) __U);
432}
433
434extern __inline __m128d
435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
437{
438 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
439 (__v2df) __B,
440 (__v2df)
441 _mm_setzero_pd (),
442 (__mmask8) __U);
443}
444
445extern __inline __m256d
446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
448 __m256d __B)
449{
450 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
451 (__v4df) __B,
452 (__v4df) __W,
453 (__mmask8) __U);
454}
455
456extern __inline __m256d
457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
459{
460 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
461 (__v4df) __B,
462 (__v4df)
463 _mm256_setzero_pd (),
464 (__mmask8) __U);
465}
466
467extern __inline __m128
468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 469_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
470{
471 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
472 (__v4sf) __B,
473 (__v4sf) __W,
474 (__mmask8) __U);
475}
476
477extern __inline __m128
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 479_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
480{
481 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
482 (__v4sf) __B,
483 (__v4sf)
484 _mm_setzero_ps (),
485 (__mmask8) __U);
486}
487
488extern __inline __m256
489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 490_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
491{
492 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
493 (__v8sf) __B,
494 (__v8sf) __W,
495 (__mmask8) __U);
496}
497
498extern __inline __m256
499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 500_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
501{
502 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
503 (__v8sf) __B,
504 (__v8sf)
505 _mm256_setzero_ps (),
506 (__mmask8) __U);
507}
508
509extern __inline __m128d
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
512{
513 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
514 (__v2df) __B,
515 (__v2df) __W,
516 (__mmask8) __U);
517}
518
519extern __inline __m128d
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
522{
523 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
524 (__v2df) __B,
525 (__v2df)
526 _mm_setzero_pd (),
527 (__mmask8) __U);
528}
529
530extern __inline __m256d
531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
532_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
533 __m256d __B)
534{
535 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
536 (__v4df) __B,
537 (__v4df) __W,
538 (__mmask8) __U);
539}
540
541extern __inline __m256d
542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
544{
545 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
546 (__v4df) __B,
547 (__v4df)
548 _mm256_setzero_pd (),
549 (__mmask8) __U);
550}
551
552extern __inline __m128
553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 554_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
555{
556 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
557 (__v4sf) __B,
558 (__v4sf) __W,
559 (__mmask8) __U);
560}
561
562extern __inline __m128
563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 564_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
565{
566 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
567 (__v4sf) __B,
568 (__v4sf)
569 _mm_setzero_ps (),
570 (__mmask8) __U);
571}
572
573extern __inline __m256
574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 575_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
576{
577 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
578 (__v8sf) __B,
579 (__v8sf) __W,
580 (__mmask8) __U);
581}
582
583extern __inline __m256
584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 585_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
586{
587 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
588 (__v8sf) __B,
589 (__v8sf)
590 _mm256_setzero_ps (),
591 (__mmask8) __U);
592}
593
594extern __inline void
595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596_mm256_store_epi64 (void *__P, __m256i __A)
597{
598 *(__m256i *) __P = __A;
599}
600
601extern __inline void
602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
603_mm_store_epi64 (void *__P, __m128i __A)
604{
605 *(__m128i *) __P = __A;
606}
607
608extern __inline __m256d
609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
611{
fc9cf6da 612 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
613 (__v4df) __W,
614 (__mmask8) __U);
615}
616
617extern __inline __m256d
618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
619_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
620{
fc9cf6da 621 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
622 (__v4df)
623 _mm256_setzero_pd (),
624 (__mmask8) __U);
625}
626
627extern __inline __m128d
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
630{
fc9cf6da 631 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
632 (__v2df) __W,
633 (__mmask8) __U);
634}
635
636extern __inline __m128d
637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
638_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
639{
fc9cf6da 640 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
641 (__v2df)
642 _mm_setzero_pd (),
643 (__mmask8) __U);
644}
645
646extern __inline void
647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
649{
fc9cf6da 650 __builtin_ia32_storeupd256_mask ((double *) __P,
936c0fe4
AI
651 (__v4df) __A,
652 (__mmask8) __U);
653}
654
655extern __inline void
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
658{
fc9cf6da 659 __builtin_ia32_storeupd128_mask ((double *) __P,
936c0fe4
AI
660 (__v2df) __A,
661 (__mmask8) __U);
662}
663
664extern __inline __m256
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
667{
fc9cf6da 668 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
669 (__v8sf) __W,
670 (__mmask8) __U);
671}
672
673extern __inline __m256
674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
676{
fc9cf6da 677 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
678 (__v8sf)
679 _mm256_setzero_ps (),
680 (__mmask8) __U);
681}
682
683extern __inline __m128
684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
686{
fc9cf6da 687 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
688 (__v4sf) __W,
689 (__mmask8) __U);
690}
691
692extern __inline __m128
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
695{
fc9cf6da 696 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
697 (__v4sf)
698 _mm_setzero_ps (),
699 (__mmask8) __U);
700}
701
702extern __inline void
703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
704_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
705{
fc9cf6da 706 __builtin_ia32_storeups256_mask ((float *) __P,
936c0fe4
AI
707 (__v8sf) __A,
708 (__mmask8) __U);
709}
710
711extern __inline void
712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
714{
fc9cf6da 715 __builtin_ia32_storeups128_mask ((float *) __P,
936c0fe4
AI
716 (__v4sf) __A,
717 (__mmask8) __U);
718}
719
720extern __inline __m256i
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
723{
fc9cf6da 724 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
725 (__v4di) __W,
726 (__mmask8) __U);
727}
728
729extern __inline __m256i
730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
732{
fc9cf6da 733 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
734 (__v4di)
735 _mm256_setzero_si256 (),
736 (__mmask8) __U);
737}
738
739extern __inline __m128i
740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
741_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
742{
fc9cf6da 743 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4
AI
744 (__v2di) __W,
745 (__mmask8) __U);
746}
747
748extern __inline __m128i
749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
750_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
751{
fc9cf6da 752 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4 753 (__v2di)
a25a7887 754 _mm_setzero_si128 (),
936c0fe4
AI
755 (__mmask8) __U);
756}
757
758extern __inline void
759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
761{
fc9cf6da 762 __builtin_ia32_storedqudi256_mask ((long long *) __P,
936c0fe4
AI
763 (__v4di) __A,
764 (__mmask8) __U);
765}
766
767extern __inline void
768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
769_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
770{
fc9cf6da 771 __builtin_ia32_storedqudi128_mask ((long long *) __P,
936c0fe4
AI
772 (__v2di) __A,
773 (__mmask8) __U);
774}
775
776extern __inline __m256i
777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
778_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
779{
fc9cf6da 780 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
781 (__v8si) __W,
782 (__mmask8) __U);
783}
784
785extern __inline __m256i
786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
787_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
788{
fc9cf6da 789 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
790 (__v8si)
791 _mm256_setzero_si256 (),
792 (__mmask8) __U);
793}
794
795extern __inline __m128i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
798{
fc9cf6da 799 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
800 (__v4si) __W,
801 (__mmask8) __U);
802}
803
804extern __inline __m128i
805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
806_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
807{
fc9cf6da 808 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
809 (__v4si)
810 _mm_setzero_si128 (),
811 (__mmask8) __U);
812}
813
814extern __inline void
815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
816_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
817{
fc9cf6da 818 __builtin_ia32_storedqusi256_mask ((int *) __P,
936c0fe4
AI
819 (__v8si) __A,
820 (__mmask8) __U);
821}
822
823extern __inline void
824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
826{
fc9cf6da 827 __builtin_ia32_storedqusi128_mask ((int *) __P,
936c0fe4
AI
828 (__v4si) __A,
829 (__mmask8) __U);
830}
831
832extern __inline __m256i
833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
835{
836 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
837 (__v8si) __W,
838 (__mmask8) __U);
839}
840
841extern __inline __m256i
842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
844{
845 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
846 (__v8si)
847 _mm256_setzero_si256 (),
848 (__mmask8) __U);
849}
850
851extern __inline __m128i
852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
854{
855 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
856 (__v4si) __W,
857 (__mmask8) __U);
858}
859
860extern __inline __m128i
861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
862_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
863{
864 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
865 (__v4si)
866 _mm_setzero_si128 (),
867 (__mmask8) __U);
868}
869
870extern __inline __m256i
871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
872_mm256_abs_epi64 (__m256i __A)
873{
874 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
875 (__v4di)
876 _mm256_setzero_si256 (),
877 (__mmask8) -1);
878}
879
880extern __inline __m256i
881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
883{
884 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
885 (__v4di) __W,
886 (__mmask8) __U);
887}
888
889extern __inline __m256i
890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
891_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
892{
893 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
894 (__v4di)
895 _mm256_setzero_si256 (),
896 (__mmask8) __U);
897}
898
899extern __inline __m128i
900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
901_mm_abs_epi64 (__m128i __A)
902{
903 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
904 (__v2di)
a25a7887 905 _mm_setzero_si128 (),
936c0fe4
AI
906 (__mmask8) -1);
907}
908
909extern __inline __m128i
910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
912{
913 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
914 (__v2di) __W,
915 (__mmask8) __U);
916}
917
918extern __inline __m128i
919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
920_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
921{
922 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
923 (__v2di)
a25a7887 924 _mm_setzero_si128 (),
936c0fe4
AI
925 (__mmask8) __U);
926}
927
928extern __inline __m128i
929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
930_mm256_cvtpd_epu32 (__m256d __A)
931{
932 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
933 (__v4si)
934 _mm_setzero_si128 (),
935 (__mmask8) -1);
936}
937
938extern __inline __m128i
939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
941{
942 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
943 (__v4si) __W,
944 (__mmask8) __U);
945}
946
947extern __inline __m128i
948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
949_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
950{
951 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
952 (__v4si)
953 _mm_setzero_si128 (),
954 (__mmask8) __U);
955}
956
957extern __inline __m128i
958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
959_mm_cvtpd_epu32 (__m128d __A)
960{
961 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
962 (__v4si)
963 _mm_setzero_si128 (),
964 (__mmask8) -1);
965}
966
967extern __inline __m128i
968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
970{
971 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
972 (__v4si) __W,
973 (__mmask8) __U);
974}
975
976extern __inline __m128i
977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
979{
980 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
981 (__v4si)
982 _mm_setzero_si128 (),
983 (__mmask8) __U);
984}
985
986extern __inline __m256i
987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
988_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
989{
990 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
991 (__v8si) __W,
992 (__mmask8) __U);
993}
994
995extern __inline __m256i
996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
998{
999 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1000 (__v8si)
1001 _mm256_setzero_si256 (),
1002 (__mmask8) __U);
1003}
1004
1005extern __inline __m128i
1006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1008{
1009 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1010 (__v4si) __W,
1011 (__mmask8) __U);
1012}
1013
1014extern __inline __m128i
1015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1017{
1018 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1019 (__v4si)
1020 _mm_setzero_si128 (),
1021 (__mmask8) __U);
1022}
1023
1024extern __inline __m256i
1025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026_mm256_cvttps_epu32 (__m256 __A)
1027{
1028 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1029 (__v8si)
1030 _mm256_setzero_si256 (),
1031 (__mmask8) -1);
1032}
1033
1034extern __inline __m256i
1035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1037{
1038 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1039 (__v8si) __W,
1040 (__mmask8) __U);
1041}
1042
1043extern __inline __m256i
1044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1045_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1046{
1047 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1048 (__v8si)
1049 _mm256_setzero_si256 (),
1050 (__mmask8) __U);
1051}
1052
1053extern __inline __m128i
1054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1055_mm_cvttps_epu32 (__m128 __A)
1056{
1057 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1058 (__v4si)
1059 _mm_setzero_si128 (),
1060 (__mmask8) -1);
1061}
1062
1063extern __inline __m128i
1064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1065_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1066{
1067 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1068 (__v4si) __W,
1069 (__mmask8) __U);
1070}
1071
1072extern __inline __m128i
1073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1074_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1075{
1076 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1077 (__v4si)
1078 _mm_setzero_si128 (),
1079 (__mmask8) __U);
1080}
1081
1082extern __inline __m128i
1083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1085{
1086 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1087 (__v4si) __W,
1088 (__mmask8) __U);
1089}
1090
1091extern __inline __m128i
1092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1094{
1095 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1096 (__v4si)
1097 _mm_setzero_si128 (),
1098 (__mmask8) __U);
1099}
1100
1101extern __inline __m128i
1102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1104{
1105 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1106 (__v4si) __W,
1107 (__mmask8) __U);
1108}
1109
1110extern __inline __m128i
1111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1113{
1114 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1115 (__v4si)
1116 _mm_setzero_si128 (),
1117 (__mmask8) __U);
1118}
1119
1120extern __inline __m128i
1121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122_mm256_cvttpd_epu32 (__m256d __A)
1123{
1124 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1125 (__v4si)
1126 _mm_setzero_si128 (),
1127 (__mmask8) -1);
1128}
1129
1130extern __inline __m128i
1131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1132_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1133{
1134 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1135 (__v4si) __W,
1136 (__mmask8) __U);
1137}
1138
1139extern __inline __m128i
1140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1142{
1143 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1144 (__v4si)
1145 _mm_setzero_si128 (),
1146 (__mmask8) __U);
1147}
1148
1149extern __inline __m128i
1150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151_mm_cvttpd_epu32 (__m128d __A)
1152{
1153 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1154 (__v4si)
1155 _mm_setzero_si128 (),
1156 (__mmask8) -1);
1157}
1158
1159extern __inline __m128i
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1162{
1163 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1164 (__v4si) __W,
1165 (__mmask8) __U);
1166}
1167
1168extern __inline __m128i
1169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1170_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1171{
1172 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1173 (__v4si)
1174 _mm_setzero_si128 (),
1175 (__mmask8) __U);
1176}
1177
1178extern __inline __m128i
1179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1180_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1181{
1182 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1183 (__v4si) __W,
1184 (__mmask8) __U);
1185}
1186
1187extern __inline __m128i
1188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1189_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1190{
1191 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1192 (__v4si)
1193 _mm_setzero_si128 (),
1194 (__mmask8) __U);
1195}
1196
1197extern __inline __m128i
1198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1200{
1201 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1202 (__v4si) __W,
1203 (__mmask8) __U);
1204}
1205
1206extern __inline __m128i
1207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1209{
1210 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1211 (__v4si)
1212 _mm_setzero_si128 (),
1213 (__mmask8) __U);
1214}
1215
1216extern __inline __m256d
1217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1219{
1220 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1221 (__v4df) __W,
1222 (__mmask8) __U);
1223}
1224
1225extern __inline __m256d
1226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1228{
1229 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1230 (__v4df)
1231 _mm256_setzero_pd (),
1232 (__mmask8) __U);
1233}
1234
1235extern __inline __m128d
1236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1237_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1238{
1239 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1240 (__v2df) __W,
1241 (__mmask8) __U);
1242}
1243
1244extern __inline __m128d
1245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1246_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1247{
1248 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1249 (__v2df)
1250 _mm_setzero_pd (),
1251 (__mmask8) __U);
1252}
1253
1254extern __inline __m256d
1255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1256_mm256_cvtepu32_pd (__m128i __A)
1257{
1258 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1259 (__v4df)
1260 _mm256_setzero_pd (),
1261 (__mmask8) -1);
1262}
1263
1264extern __inline __m256d
1265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1266_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1267{
1268 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1269 (__v4df) __W,
1270 (__mmask8) __U);
1271}
1272
1273extern __inline __m256d
1274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1275_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1276{
1277 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1278 (__v4df)
1279 _mm256_setzero_pd (),
1280 (__mmask8) __U);
1281}
1282
1283extern __inline __m128d
1284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1285_mm_cvtepu32_pd (__m128i __A)
1286{
1287 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1288 (__v2df)
1289 _mm_setzero_pd (),
1290 (__mmask8) -1);
1291}
1292
1293extern __inline __m128d
1294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1295_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1296{
1297 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1298 (__v2df) __W,
1299 (__mmask8) __U);
1300}
1301
1302extern __inline __m128d
1303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1305{
1306 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1307 (__v2df)
1308 _mm_setzero_pd (),
1309 (__mmask8) __U);
1310}
1311
1312extern __inline __m256
1313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1315{
1316 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1317 (__v8sf) __W,
1318 (__mmask8) __U);
1319}
1320
1321extern __inline __m256
1322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1323_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
936c0fe4
AI
1324{
1325 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1326 (__v8sf)
1327 _mm256_setzero_ps (),
1328 (__mmask8) __U);
1329}
1330
1331extern __inline __m128
1332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1334{
1335 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1336 (__v4sf) __W,
1337 (__mmask8) __U);
1338}
1339
1340extern __inline __m128
1341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1342_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
936c0fe4
AI
1343{
1344 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1345 (__v4sf)
1346 _mm_setzero_ps (),
1347 (__mmask8) __U);
1348}
1349
1350extern __inline __m256
1351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352_mm256_cvtepu32_ps (__m256i __A)
1353{
1354 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1355 (__v8sf)
1356 _mm256_setzero_ps (),
1357 (__mmask8) -1);
1358}
1359
1360extern __inline __m256
1361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1363{
1364 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1365 (__v8sf) __W,
1366 (__mmask8) __U);
1367}
1368
1369extern __inline __m256
1370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1371_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1372{
1373 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1374 (__v8sf)
1375 _mm256_setzero_ps (),
1376 (__mmask8) __U);
1377}
1378
1379extern __inline __m128
1380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381_mm_cvtepu32_ps (__m128i __A)
1382{
1383 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1384 (__v4sf)
1385 _mm_setzero_ps (),
1386 (__mmask8) -1);
1387}
1388
1389extern __inline __m128
1390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1392{
1393 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1394 (__v4sf) __W,
1395 (__mmask8) __U);
1396}
1397
1398extern __inline __m128
1399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1400_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1401{
1402 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1403 (__v4sf)
1404 _mm_setzero_ps (),
1405 (__mmask8) __U);
1406}
1407
1408extern __inline __m256d
1409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1411{
1412 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1413 (__v4df) __W,
1414 (__mmask8) __U);
1415}
1416
1417extern __inline __m256d
1418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1420{
1421 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1422 (__v4df)
1423 _mm256_setzero_pd (),
1424 (__mmask8) __U);
1425}
1426
1427extern __inline __m128d
1428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1429_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1430{
1431 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1432 (__v2df) __W,
1433 (__mmask8) __U);
1434}
1435
1436extern __inline __m128d
1437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1439{
1440 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1441 (__v2df)
1442 _mm_setzero_pd (),
1443 (__mmask8) __U);
1444}
1445
1446extern __inline __m128i
1447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1448_mm_cvtepi32_epi8 (__m128i __A)
1449{
1450 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
a25a7887
JJ
1451 (__v16qi)
1452 _mm_undefined_si128 (),
936c0fe4
AI
1453 (__mmask8) -1);
1454}
1455
1456extern __inline void
1457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1458_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1459{
1460 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1461}
1462
1463extern __inline __m128i
1464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1466{
1467 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468 (__v16qi) __O, __M);
1469}
1470
1471extern __inline __m128i
1472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1474{
1475 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1476 (__v16qi)
1477 _mm_setzero_si128 (),
1478 __M);
1479}
1480
1481extern __inline __m128i
1482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1483_mm256_cvtepi32_epi8 (__m256i __A)
1484{
1485 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
a25a7887
JJ
1486 (__v16qi)
1487 _mm_undefined_si128 (),
936c0fe4
AI
1488 (__mmask8) -1);
1489}
1490
1491extern __inline __m128i
1492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1493_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1494{
1495 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1496 (__v16qi) __O, __M);
1497}
1498
1499extern __inline void
1500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1502{
1503 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1504}
1505
1506extern __inline __m128i
1507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1509{
1510 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511 (__v16qi)
1512 _mm_setzero_si128 (),
1513 __M);
1514}
1515
1516extern __inline __m128i
1517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1518_mm_cvtsepi32_epi8 (__m128i __A)
1519{
1520 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
a25a7887
JJ
1521 (__v16qi)
1522 _mm_undefined_si128 (),
936c0fe4
AI
1523 (__mmask8) -1);
1524}
1525
1526extern __inline void
1527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1529{
1530 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1531}
1532
1533extern __inline __m128i
1534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1536{
1537 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1538 (__v16qi) __O, __M);
1539}
1540
1541extern __inline __m128i
1542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1543_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1544{
1545 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1546 (__v16qi)
1547 _mm_setzero_si128 (),
1548 __M);
1549}
1550
1551extern __inline __m128i
1552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553_mm256_cvtsepi32_epi8 (__m256i __A)
1554{
1555 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
a25a7887
JJ
1556 (__v16qi)
1557 _mm_undefined_si128 (),
936c0fe4
AI
1558 (__mmask8) -1);
1559}
1560
1561extern __inline void
1562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1563_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1564{
1565 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1566}
1567
1568extern __inline __m128i
1569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1571{
1572 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1573 (__v16qi) __O, __M);
1574}
1575
1576extern __inline __m128i
1577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1578_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1579{
1580 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1581 (__v16qi)
1582 _mm_setzero_si128 (),
1583 __M);
1584}
1585
1586extern __inline __m128i
1587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1588_mm_cvtusepi32_epi8 (__m128i __A)
1589{
1590 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
a25a7887
JJ
1591 (__v16qi)
1592 _mm_undefined_si128 (),
936c0fe4
AI
1593 (__mmask8) -1);
1594}
1595
1596extern __inline void
1597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1599{
1600 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1601}
1602
1603extern __inline __m128i
1604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1605_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1606{
1607 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1608 (__v16qi) __O,
1609 __M);
1610}
1611
1612extern __inline __m128i
1613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1615{
1616 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1617 (__v16qi)
1618 _mm_setzero_si128 (),
1619 __M);
1620}
1621
1622extern __inline __m128i
1623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1624_mm256_cvtusepi32_epi8 (__m256i __A)
1625{
1626 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
a25a7887
JJ
1627 (__v16qi)
1628 _mm_undefined_si128 (),
936c0fe4
AI
1629 (__mmask8) -1);
1630}
1631
1632extern __inline void
1633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1635{
1636 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1637}
1638
1639extern __inline __m128i
1640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1642{
1643 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1644 (__v16qi) __O,
1645 __M);
1646}
1647
1648extern __inline __m128i
1649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1651{
1652 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1653 (__v16qi)
1654 _mm_setzero_si128 (),
1655 __M);
1656}
1657
1658extern __inline __m128i
1659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660_mm_cvtepi32_epi16 (__m128i __A)
1661{
1662 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
a25a7887
JJ
1663 (__v8hi)
1664 _mm_setzero_si128 (),
936c0fe4
AI
1665 (__mmask8) -1);
1666}
1667
1668extern __inline void
1669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1670_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1671{
1672 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1673}
1674
1675extern __inline __m128i
1676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1677_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1678{
1679 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1680 (__v8hi) __O, __M);
1681}
1682
1683extern __inline __m128i
1684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1686{
1687 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1688 (__v8hi)
1689 _mm_setzero_si128 (),
1690 __M);
1691}
1692
1693extern __inline __m128i
1694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695_mm256_cvtepi32_epi16 (__m256i __A)
1696{
1697 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
a25a7887
JJ
1698 (__v8hi)
1699 _mm_setzero_si128 (),
936c0fe4
AI
1700 (__mmask8) -1);
1701}
1702
9ab4c07a 1703extern __inline void
936c0fe4
AI
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1706{
1707 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1708}
1709
1710extern __inline __m128i
1711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1712_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1713{
1714 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1715 (__v8hi) __O, __M);
1716}
1717
1718extern __inline __m128i
1719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1721{
1722 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1723 (__v8hi)
1724 _mm_setzero_si128 (),
1725 __M);
1726}
1727
1728extern __inline __m128i
1729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730_mm_cvtsepi32_epi16 (__m128i __A)
1731{
1732 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
a25a7887
JJ
1733 (__v8hi)
1734 _mm_setzero_si128 (),
936c0fe4
AI
1735 (__mmask8) -1);
1736}
1737
1738extern __inline void
1739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1741{
1742 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1743}
1744
1745extern __inline __m128i
1746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1747_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1748{
1749 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1750 (__v8hi)__O,
1751 __M);
1752}
1753
1754extern __inline __m128i
1755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1756_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1757{
1758 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1759 (__v8hi)
1760 _mm_setzero_si128 (),
1761 __M);
1762}
1763
1764extern __inline __m128i
1765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766_mm256_cvtsepi32_epi16 (__m256i __A)
1767{
1768 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
a25a7887
JJ
1769 (__v8hi)
1770 _mm_undefined_si128 (),
936c0fe4
AI
1771 (__mmask8) -1);
1772}
1773
1774extern __inline void
1775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1776_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1777{
1778 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1779}
1780
1781extern __inline __m128i
1782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1784{
1785 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1786 (__v8hi) __O, __M);
1787}
1788
1789extern __inline __m128i
1790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1791_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1792{
1793 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1794 (__v8hi)
1795 _mm_setzero_si128 (),
1796 __M);
1797}
1798
1799extern __inline __m128i
1800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801_mm_cvtusepi32_epi16 (__m128i __A)
1802{
1803 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
a25a7887
JJ
1804 (__v8hi)
1805 _mm_undefined_si128 (),
936c0fe4
AI
1806 (__mmask8) -1);
1807}
1808
9ab4c07a 1809extern __inline void
936c0fe4
AI
1810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1812{
1813 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1814}
1815
1816extern __inline __m128i
1817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1819{
1820 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1821 (__v8hi) __O, __M);
1822}
1823
1824extern __inline __m128i
1825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1826_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1827{
1828 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1829 (__v8hi)
1830 _mm_setzero_si128 (),
1831 __M);
1832}
1833
1834extern __inline __m128i
1835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1836_mm256_cvtusepi32_epi16 (__m256i __A)
1837{
1838 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
a25a7887
JJ
1839 (__v8hi)
1840 _mm_undefined_si128 (),
936c0fe4
AI
1841 (__mmask8) -1);
1842}
1843
1844extern __inline void
1845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1847{
1848 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1849}
1850
1851extern __inline __m128i
1852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1854{
1855 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1856 (__v8hi) __O, __M);
1857}
1858
1859extern __inline __m128i
1860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1861_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1862{
1863 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1864 (__v8hi)
1865 _mm_setzero_si128 (),
1866 __M);
1867}
1868
1869extern __inline __m128i
1870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1871_mm_cvtepi64_epi8 (__m128i __A)
1872{
1873 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
a25a7887
JJ
1874 (__v16qi)
1875 _mm_undefined_si128 (),
936c0fe4
AI
1876 (__mmask8) -1);
1877}
1878
1879extern __inline void
1880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1881_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1882{
1883 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1884}
1885
1886extern __inline __m128i
1887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1888_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1889{
1890 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1891 (__v16qi) __O, __M);
1892}
1893
1894extern __inline __m128i
1895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1896_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1897{
1898 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1899 (__v16qi)
1900 _mm_setzero_si128 (),
1901 __M);
1902}
1903
1904extern __inline __m128i
1905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906_mm256_cvtepi64_epi8 (__m256i __A)
1907{
1908 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
a25a7887
JJ
1909 (__v16qi)
1910 _mm_undefined_si128 (),
936c0fe4
AI
1911 (__mmask8) -1);
1912}
1913
1914extern __inline void
1915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1916_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1917{
1918 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1919}
1920
1921extern __inline __m128i
1922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1924{
1925 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1926 (__v16qi) __O, __M);
1927}
1928
1929extern __inline __m128i
1930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1932{
1933 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1934 (__v16qi)
1935 _mm_setzero_si128 (),
1936 __M);
1937}
1938
1939extern __inline __m128i
1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941_mm_cvtsepi64_epi8 (__m128i __A)
1942{
1943 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
a25a7887
JJ
1944 (__v16qi)
1945 _mm_undefined_si128 (),
936c0fe4
AI
1946 (__mmask8) -1);
1947}
1948
1949extern __inline void
1950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1952{
1953 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1954}
1955
1956extern __inline __m128i
1957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1958_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1959{
1960 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1961 (__v16qi) __O, __M);
1962}
1963
1964extern __inline __m128i
1965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1967{
1968 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1969 (__v16qi)
1970 _mm_setzero_si128 (),
1971 __M);
1972}
1973
1974extern __inline __m128i
1975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1976_mm256_cvtsepi64_epi8 (__m256i __A)
1977{
1978 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
a25a7887
JJ
1979 (__v16qi)
1980 _mm_undefined_si128 (),
936c0fe4
AI
1981 (__mmask8) -1);
1982}
1983
1984extern __inline void
1985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1987{
1988 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1989}
1990
1991extern __inline __m128i
1992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1993_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1994{
1995 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1996 (__v16qi) __O, __M);
1997}
1998
1999extern __inline __m128i
2000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2002{
2003 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2004 (__v16qi)
2005 _mm_setzero_si128 (),
2006 __M);
2007}
2008
2009extern __inline __m128i
2010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011_mm_cvtusepi64_epi8 (__m128i __A)
2012{
2013 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
a25a7887
JJ
2014 (__v16qi)
2015 _mm_undefined_si128 (),
936c0fe4
AI
2016 (__mmask8) -1);
2017}
2018
2019extern __inline void
2020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2022{
2023 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2024}
2025
2026extern __inline __m128i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2029{
2030 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031 (__v16qi) __O,
2032 __M);
2033}
2034
2035extern __inline __m128i
2036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2038{
2039 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040 (__v16qi)
2041 _mm_setzero_si128 (),
2042 __M);
2043}
2044
2045extern __inline __m128i
2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047_mm256_cvtusepi64_epi8 (__m256i __A)
2048{
2049 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
a25a7887
JJ
2050 (__v16qi)
2051 _mm_undefined_si128 (),
936c0fe4
AI
2052 (__mmask8) -1);
2053}
2054
2055extern __inline void
2056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2058{
2059 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2060}
2061
2062extern __inline __m128i
2063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2065{
2066 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2067 (__v16qi) __O,
2068 __M);
2069}
2070
2071extern __inline __m128i
2072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2074{
2075 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2076 (__v16qi)
2077 _mm_setzero_si128 (),
2078 __M);
2079}
2080
2081extern __inline __m128i
2082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2083_mm_cvtepi64_epi16 (__m128i __A)
2084{
2085 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
a25a7887
JJ
2086 (__v8hi)
2087 _mm_undefined_si128 (),
936c0fe4
AI
2088 (__mmask8) -1);
2089}
2090
2091extern __inline void
2092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2093_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2094{
2095 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2096}
2097
2098extern __inline __m128i
2099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2100_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2101{
2102 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2103 (__v8hi)__O,
2104 __M);
2105}
2106
2107extern __inline __m128i
2108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2110{
2111 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2112 (__v8hi)
2113 _mm_setzero_si128 (),
2114 __M);
2115}
2116
2117extern __inline __m128i
2118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2119_mm256_cvtepi64_epi16 (__m256i __A)
2120{
2121 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
a25a7887
JJ
2122 (__v8hi)
2123 _mm_undefined_si128 (),
936c0fe4
AI
2124 (__mmask8) -1);
2125}
2126
2127extern __inline void
2128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2129_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2130{
2131 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2132}
2133
2134extern __inline __m128i
2135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2136_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2137{
2138 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2139 (__v8hi) __O, __M);
2140}
2141
2142extern __inline __m128i
2143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2145{
2146 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2147 (__v8hi)
2148 _mm_setzero_si128 (),
2149 __M);
2150}
2151
2152extern __inline __m128i
2153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154_mm_cvtsepi64_epi16 (__m128i __A)
2155{
2156 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
a25a7887
JJ
2157 (__v8hi)
2158 _mm_undefined_si128 (),
936c0fe4
AI
2159 (__mmask8) -1);
2160}
2161
2162extern __inline void
2163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2164_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2165{
2166 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2167}
2168
2169extern __inline __m128i
2170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2171_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2172{
2173 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2174 (__v8hi) __O, __M);
2175}
2176
2177extern __inline __m128i
2178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2180{
2181 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2182 (__v8hi)
2183 _mm_setzero_si128 (),
2184 __M);
2185}
2186
2187extern __inline __m128i
2188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189_mm256_cvtsepi64_epi16 (__m256i __A)
2190{
2191 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
a25a7887
JJ
2192 (__v8hi)
2193 _mm_undefined_si128 (),
936c0fe4
AI
2194 (__mmask8) -1);
2195}
2196
2197extern __inline void
2198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2199_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2200{
2201 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2202}
2203
2204extern __inline __m128i
2205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2206_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2207{
2208 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2209 (__v8hi) __O, __M);
2210}
2211
2212extern __inline __m128i
2213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2214_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2215{
2216 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2217 (__v8hi)
2218 _mm_setzero_si128 (),
2219 __M);
2220}
2221
2222extern __inline __m128i
2223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2224_mm_cvtusepi64_epi16 (__m128i __A)
2225{
2226 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
a25a7887
JJ
2227 (__v8hi)
2228 _mm_undefined_si128 (),
936c0fe4
AI
2229 (__mmask8) -1);
2230}
2231
2232extern __inline void
2233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2234_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2235{
2236 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2237}
2238
2239extern __inline __m128i
2240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2242{
2243 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2244 (__v8hi) __O, __M);
2245}
2246
2247extern __inline __m128i
2248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2249_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2250{
2251 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2252 (__v8hi)
2253 _mm_setzero_si128 (),
2254 __M);
2255}
2256
2257extern __inline __m128i
2258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2259_mm256_cvtusepi64_epi16 (__m256i __A)
2260{
2261 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
a25a7887
JJ
2262 (__v8hi)
2263 _mm_undefined_si128 (),
936c0fe4
AI
2264 (__mmask8) -1);
2265}
2266
2267extern __inline void
2268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2270{
2271 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2272}
2273
2274extern __inline __m128i
2275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2277{
2278 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2279 (__v8hi) __O, __M);
2280}
2281
2282extern __inline __m128i
2283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2284_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2285{
2286 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2287 (__v8hi)
2288 _mm_setzero_si128 (),
2289 __M);
2290}
2291
2292extern __inline __m128i
2293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2294_mm_cvtepi64_epi32 (__m128i __A)
2295{
2296 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
a25a7887
JJ
2297 (__v4si)
2298 _mm_undefined_si128 (),
936c0fe4
AI
2299 (__mmask8) -1);
2300}
2301
2302extern __inline void
2303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2304_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2305{
2306 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2307}
2308
2309extern __inline __m128i
2310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2312{
2313 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314 (__v4si) __O, __M);
2315}
2316
2317extern __inline __m128i
2318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2319_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2320{
2321 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2322 (__v4si)
2323 _mm_setzero_si128 (),
2324 __M);
2325}
2326
2327extern __inline __m128i
2328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329_mm256_cvtepi64_epi32 (__m256i __A)
2330{
2331 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
a25a7887
JJ
2332 (__v4si)
2333 _mm_undefined_si128 (),
936c0fe4
AI
2334 (__mmask8) -1);
2335}
2336
2337extern __inline void
2338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2339_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2340{
2341 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2342}
2343
2344extern __inline __m128i
2345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2347{
2348 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2349 (__v4si) __O, __M);
2350}
2351
2352extern __inline __m128i
2353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2355{
2356 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2357 (__v4si)
2358 _mm_setzero_si128 (),
2359 __M);
2360}
2361
2362extern __inline __m128i
2363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364_mm_cvtsepi64_epi32 (__m128i __A)
2365{
2366 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
a25a7887
JJ
2367 (__v4si)
2368 _mm_undefined_si128 (),
936c0fe4
AI
2369 (__mmask8) -1);
2370}
2371
9ab4c07a 2372extern __inline void
936c0fe4
AI
2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2375{
2376 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2377}
2378
2379extern __inline __m128i
2380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2381_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2382{
2383 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2384 (__v4si) __O, __M);
2385}
2386
2387extern __inline __m128i
2388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2390{
2391 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2392 (__v4si)
2393 _mm_setzero_si128 (),
2394 __M);
2395}
2396
2397extern __inline __m128i
2398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399_mm256_cvtsepi64_epi32 (__m256i __A)
2400{
2401 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
a25a7887
JJ
2402 (__v4si)
2403 _mm_undefined_si128 (),
936c0fe4
AI
2404 (__mmask8) -1);
2405}
2406
2407extern __inline void
2408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2409_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2410{
2411 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2412}
2413
2414extern __inline __m128i
2415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2416_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2417{
2418 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2419 (__v4si)__O,
2420 __M);
2421}
2422
2423extern __inline __m128i
2424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2426{
2427 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2428 (__v4si)
2429 _mm_setzero_si128 (),
2430 __M);
2431}
2432
2433extern __inline __m128i
2434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2435_mm_cvtusepi64_epi32 (__m128i __A)
2436{
2437 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
a25a7887
JJ
2438 (__v4si)
2439 _mm_undefined_si128 (),
936c0fe4
AI
2440 (__mmask8) -1);
2441}
2442
2443extern __inline void
2444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2445_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2446{
2447 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2448}
2449
2450extern __inline __m128i
2451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2453{
2454 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2455 (__v4si) __O, __M);
2456}
2457
2458extern __inline __m128i
2459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2460_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2461{
2462 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2463 (__v4si)
2464 _mm_setzero_si128 (),
2465 __M);
2466}
2467
2468extern __inline __m128i
2469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470_mm256_cvtusepi64_epi32 (__m256i __A)
2471{
2472 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
a25a7887
JJ
2473 (__v4si)
2474 _mm_undefined_si128 (),
936c0fe4
AI
2475 (__mmask8) -1);
2476}
2477
2478extern __inline void
2479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2480_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2481{
2482 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2483}
2484
2485extern __inline __m128i
2486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2487_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2488{
2489 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2490 (__v4si) __O, __M);
2491}
2492
2493extern __inline __m128i
2494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2496{
2497 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2498 (__v4si)
2499 _mm_setzero_si128 (),
2500 __M);
2501}
2502
2503extern __inline __m256
2504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2505_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2506{
2507 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2508 (__v8sf) __O,
2509 __M);
2510}
2511
2512extern __inline __m256
2513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2514_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2515{
2516 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2517 (__v8sf)
2518 _mm256_setzero_ps (),
2519 __M);
2520}
2521
2522extern __inline __m128
2523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2524_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2525{
2526 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2527 (__v4sf) __O,
2528 __M);
2529}
2530
2531extern __inline __m128
2532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2534{
2535 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2536 (__v4sf)
2537 _mm_setzero_ps (),
2538 __M);
2539}
2540
2541extern __inline __m256d
2542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2543_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2544{
2545 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2546 (__v4df) __O,
2547 __M);
2548}
2549
2550extern __inline __m256d
2551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2553{
2554 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2555 (__v4df)
2556 _mm256_setzero_pd (),
2557 __M);
2558}
2559
2560extern __inline __m256i
2561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2563{
2564 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2565 (__v8si) __O,
2566 __M);
2567}
2568
2569extern __inline __m256i
2570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2572{
2573 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2574 (__v8si)
2575 _mm256_setzero_si256 (),
2576 __M);
2577}
2578
2579extern __inline __m256i
2580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2581_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2582{
2583 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2584 __M);
2585}
2586
2587extern __inline __m256i
2588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2589_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2590{
2591 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2592 (__v8si)
2593 _mm256_setzero_si256 (),
2594 __M);
2595}
2596
2597extern __inline __m128i
2598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2599_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2600{
2601 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2602 (__v4si) __O,
2603 __M);
2604}
2605
2606extern __inline __m128i
2607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2609{
2610 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2611 (__v4si)
2612 _mm_setzero_si128 (),
2613 __M);
2614}
2615
2616extern __inline __m128i
2617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2618_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2619{
2620 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2621 __M);
2622}
2623
2624extern __inline __m128i
2625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2626_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2627{
a25a7887
JJ
2628 return (__m128i)
2629 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2630 (__v4si) _mm_setzero_si128 (),
2631 __M);
936c0fe4
AI
2632}
2633
2634extern __inline __m256i
2635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2636_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2637{
2638 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2639 (__v4di) __O,
2640 __M);
2641}
2642
2643extern __inline __m256i
2644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2645_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2646{
2647 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2648 (__v4di)
2649 _mm256_setzero_si256 (),
2650 __M);
2651}
2652
2653extern __inline __m256i
2654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2656{
936c0fe4
AI
2657 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2658 __M);
936c0fe4
AI
2659}
2660
2661extern __inline __m256i
2662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2663_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2664{
936c0fe4
AI
2665 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2666 (__v4di)
2667 _mm256_setzero_si256 (),
2668 __M);
936c0fe4
AI
2669}
2670
2671extern __inline __m128i
2672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2674{
2675 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2676 (__v2di) __O,
2677 __M);
2678}
2679
2680extern __inline __m128i
2681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2682_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2683{
2684 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2685 (__v2di)
2686 _mm_setzero_si128 (),
2687 __M);
2688}
2689
2690extern __inline __m128i
2691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2693{
936c0fe4
AI
2694 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2695 __M);
936c0fe4
AI
2696}
2697
2698extern __inline __m128i
2699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2700_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2701{
a25a7887
JJ
2702 return (__m128i)
2703 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2704 (__v2di) _mm_setzero_si128 (),
2705 __M);
936c0fe4
AI
2706}
2707
2708extern __inline __m256
2709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710_mm256_broadcast_f32x4 (__m128 __A)
2711{
2712 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2713 (__v8sf)_mm256_undefined_pd (),
c42b0bdf 2714 (__mmask8) -1);
936c0fe4
AI
2715}
2716
2717extern __inline __m256
2718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2719_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2720{
2721 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2722 (__v8sf) __O,
2723 __M);
2724}
2725
2726extern __inline __m256
2727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2728_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2729{
2730 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2731 (__v8sf)
2732 _mm256_setzero_ps (),
2733 __M);
2734}
2735
2736extern __inline __m256i
2737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2738_mm256_broadcast_i32x4 (__m128i __A)
2739{
2740 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2741 __A,
2742 (__v8si)_mm256_undefined_si256 (),
c42b0bdf 2743 (__mmask8) -1);
936c0fe4
AI
2744}
2745
2746extern __inline __m256i
2747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2748_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2749{
2750 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2751 __A,
2752 (__v8si)
2753 __O, __M);
2754}
2755
2756extern __inline __m256i
2757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2759{
2760 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2761 __A,
2762 (__v8si)
2763 _mm256_setzero_si256 (),
2764 __M);
2765}
2766
2767extern __inline __m256i
2768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2769_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2770{
2771 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2772 (__v8si) __W,
2773 (__mmask8) __U);
2774}
2775
2776extern __inline __m256i
2777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2778_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2779{
2780 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2781 (__v8si)
2782 _mm256_setzero_si256 (),
2783 (__mmask8) __U);
2784}
2785
2786extern __inline __m128i
2787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2789{
2790 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2791 (__v4si) __W,
2792 (__mmask8) __U);
2793}
2794
2795extern __inline __m128i
2796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2798{
2799 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2800 (__v4si)
2801 _mm_setzero_si128 (),
2802 (__mmask8) __U);
2803}
2804
2805extern __inline __m256i
2806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2808{
2809 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2810 (__v4di) __W,
2811 (__mmask8) __U);
2812}
2813
2814extern __inline __m256i
2815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2817{
2818 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2819 (__v4di)
2820 _mm256_setzero_si256 (),
2821 (__mmask8) __U);
2822}
2823
2824extern __inline __m128i
2825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2826_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2827{
2828 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2829 (__v2di) __W,
2830 (__mmask8) __U);
2831}
2832
2833extern __inline __m128i
2834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2836{
2837 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2838 (__v2di)
2839 _mm_setzero_si128 (),
2840 (__mmask8) __U);
2841}
2842
2843extern __inline __m256i
2844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2846{
2847 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2848 (__v8si) __W,
2849 (__mmask8) __U);
2850}
2851
2852extern __inline __m256i
2853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2855{
2856 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2857 (__v8si)
2858 _mm256_setzero_si256 (),
2859 (__mmask8) __U);
2860}
2861
2862extern __inline __m128i
2863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2864_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2865{
2866 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2867 (__v4si) __W,
2868 (__mmask8) __U);
2869}
2870
2871extern __inline __m128i
2872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2873_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2874{
2875 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2876 (__v4si)
2877 _mm_setzero_si128 (),
2878 (__mmask8) __U);
2879}
2880
2881extern __inline __m256i
2882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2884{
2885 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2886 (__v4di) __W,
2887 (__mmask8) __U);
2888}
2889
2890extern __inline __m256i
2891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2892_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2893{
2894 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2895 (__v4di)
2896 _mm256_setzero_si256 (),
2897 (__mmask8) __U);
2898}
2899
2900extern __inline __m128i
2901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2902_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2903{
2904 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2905 (__v2di) __W,
2906 (__mmask8) __U);
2907}
2908
2909extern __inline __m128i
2910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2911_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2912{
2913 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2914 (__v2di)
2915 _mm_setzero_si128 (),
2916 (__mmask8) __U);
2917}
2918
2919extern __inline __m256i
2920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2921_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2922{
2923 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2924 (__v4di) __W,
2925 (__mmask8) __U);
2926}
2927
2928extern __inline __m256i
2929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2931{
2932 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2933 (__v4di)
2934 _mm256_setzero_si256 (),
2935 (__mmask8) __U);
2936}
2937
2938extern __inline __m128i
2939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2940_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2941{
2942 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2943 (__v2di) __W,
2944 (__mmask8) __U);
2945}
2946
2947extern __inline __m128i
2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2950{
2951 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2952 (__v2di)
2953 _mm_setzero_si128 (),
2954 (__mmask8) __U);
2955}
2956
2957extern __inline __m256i
2958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2959_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2960{
2961 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2962 (__v8si) __W,
2963 (__mmask8) __U);
2964}
2965
2966extern __inline __m256i
2967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2968_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2969{
2970 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2971 (__v8si)
2972 _mm256_setzero_si256 (),
2973 (__mmask8) __U);
2974}
2975
2976extern __inline __m128i
2977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2978_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2979{
2980 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2981 (__v4si) __W,
2982 (__mmask8) __U);
2983}
2984
2985extern __inline __m128i
2986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2987_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2988{
2989 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2990 (__v4si)
2991 _mm_setzero_si128 (),
2992 (__mmask8) __U);
2993}
2994
2995extern __inline __m256i
2996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2997_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2998{
2999 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3000 (__v4di) __W,
3001 (__mmask8) __U);
3002}
3003
3004extern __inline __m256i
3005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3007{
3008 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3009 (__v4di)
3010 _mm256_setzero_si256 (),
3011 (__mmask8) __U);
3012}
3013
3014extern __inline __m128i
3015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3016_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3017{
3018 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3019 (__v2di) __W,
3020 (__mmask8) __U);
3021}
3022
3023extern __inline __m128i
3024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3025_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3026{
3027 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3028 (__v2di)
3029 _mm_setzero_si128 (),
3030 (__mmask8) __U);
3031}
3032
3033extern __inline __m256i
3034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3036{
3037 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3038 (__v8si) __W,
3039 (__mmask8) __U);
3040}
3041
3042extern __inline __m256i
3043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3045{
3046 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3047 (__v8si)
3048 _mm256_setzero_si256 (),
3049 (__mmask8) __U);
3050}
3051
3052extern __inline __m128i
3053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3054_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3055{
3056 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3057 (__v4si) __W,
3058 (__mmask8) __U);
3059}
3060
3061extern __inline __m128i
3062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3063_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3064{
3065 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3066 (__v4si)
3067 _mm_setzero_si128 (),
3068 (__mmask8) __U);
3069}
3070
3071extern __inline __m256i
3072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3073_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3074{
3075 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3076 (__v4di) __W,
3077 (__mmask8) __U);
3078}
3079
3080extern __inline __m256i
3081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3082_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3083{
3084 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3085 (__v4di)
3086 _mm256_setzero_si256 (),
3087 (__mmask8) __U);
3088}
3089
3090extern __inline __m128i
3091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3092_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3093{
3094 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3095 (__v2di) __W,
3096 (__mmask8) __U);
3097}
3098
3099extern __inline __m128i
3100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3101_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3102{
3103 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3104 (__v2di)
3105 _mm_setzero_si128 (),
3106 (__mmask8) __U);
3107}
3108
3109extern __inline __m256i
3110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3111_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3112{
3113 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3114 (__v4di) __W,
3115 (__mmask8) __U);
3116}
3117
3118extern __inline __m256i
3119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3120_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3121{
3122 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3123 (__v4di)
3124 _mm256_setzero_si256 (),
3125 (__mmask8) __U);
3126}
3127
3128extern __inline __m128i
3129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3131{
3132 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3133 (__v2di) __W,
3134 (__mmask8) __U);
3135}
3136
3137extern __inline __m128i
3138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3140{
3141 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3142 (__v2di)
3143 _mm_setzero_si128 (),
3144 (__mmask8) __U);
3145}
3146
3147extern __inline __m256d
3148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3149_mm256_rcp14_pd (__m256d __A)
3150{
3151 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3152 (__v4df)
3153 _mm256_setzero_pd (),
3154 (__mmask8) -1);
3155}
3156
3157extern __inline __m256d
3158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3159_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3160{
3161 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3162 (__v4df) __W,
3163 (__mmask8) __U);
3164}
3165
3166extern __inline __m256d
3167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3168_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3169{
3170 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3171 (__v4df)
3172 _mm256_setzero_pd (),
3173 (__mmask8) __U);
3174}
3175
3176extern __inline __m128d
3177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3178_mm_rcp14_pd (__m128d __A)
3179{
3180 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3181 (__v2df)
3182 _mm_setzero_pd (),
3183 (__mmask8) -1);
3184}
3185
3186extern __inline __m128d
3187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3188_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3189{
3190 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3191 (__v2df) __W,
3192 (__mmask8) __U);
3193}
3194
3195extern __inline __m128d
3196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3197_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3198{
3199 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3200 (__v2df)
3201 _mm_setzero_pd (),
3202 (__mmask8) __U);
3203}
3204
3205extern __inline __m256
3206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207_mm256_rcp14_ps (__m256 __A)
3208{
3209 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3210 (__v8sf)
3211 _mm256_setzero_ps (),
3212 (__mmask8) -1);
3213}
3214
3215extern __inline __m256
3216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3217_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3218{
3219 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3220 (__v8sf) __W,
3221 (__mmask8) __U);
3222}
3223
3224extern __inline __m256
3225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3226_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3227{
3228 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3229 (__v8sf)
3230 _mm256_setzero_ps (),
3231 (__mmask8) __U);
3232}
3233
3234extern __inline __m128
3235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3236_mm_rcp14_ps (__m128 __A)
3237{
3238 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3239 (__v4sf)
3240 _mm_setzero_ps (),
3241 (__mmask8) -1);
3242}
3243
3244extern __inline __m128
3245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3246_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3247{
3248 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3249 (__v4sf) __W,
3250 (__mmask8) __U);
3251}
3252
3253extern __inline __m128
3254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3255_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3256{
3257 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3258 (__v4sf)
3259 _mm_setzero_ps (),
3260 (__mmask8) __U);
3261}
3262
3263extern __inline __m256d
3264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3265_mm256_rsqrt14_pd (__m256d __A)
3266{
3267 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3268 (__v4df)
3269 _mm256_setzero_pd (),
3270 (__mmask8) -1);
3271}
3272
3273extern __inline __m256d
3274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3276{
3277 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3278 (__v4df) __W,
3279 (__mmask8) __U);
3280}
3281
3282extern __inline __m256d
3283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3284_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3285{
3286 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3287 (__v4df)
3288 _mm256_setzero_pd (),
3289 (__mmask8) __U);
3290}
3291
3292extern __inline __m128d
3293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3294_mm_rsqrt14_pd (__m128d __A)
3295{
3296 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3297 (__v2df)
3298 _mm_setzero_pd (),
3299 (__mmask8) -1);
3300}
3301
3302extern __inline __m128d
3303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3304_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3305{
3306 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3307 (__v2df) __W,
3308 (__mmask8) __U);
3309}
3310
3311extern __inline __m128d
3312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3313_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3314{
3315 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3316 (__v2df)
3317 _mm_setzero_pd (),
3318 (__mmask8) __U);
3319}
3320
3321extern __inline __m256
3322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3323_mm256_rsqrt14_ps (__m256 __A)
3324{
3325 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3326 (__v8sf)
3327 _mm256_setzero_ps (),
3328 (__mmask8) -1);
3329}
3330
3331extern __inline __m256
3332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3333_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3334{
3335 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3336 (__v8sf) __W,
3337 (__mmask8) __U);
3338}
3339
3340extern __inline __m256
3341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3343{
3344 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3345 (__v8sf)
3346 _mm256_setzero_ps (),
3347 (__mmask8) __U);
3348}
3349
3350extern __inline __m128
3351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352_mm_rsqrt14_ps (__m128 __A)
3353{
3354 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3355 (__v4sf)
3356 _mm_setzero_ps (),
3357 (__mmask8) -1);
3358}
3359
3360extern __inline __m128
3361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3362_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3363{
3364 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3365 (__v4sf) __W,
3366 (__mmask8) __U);
3367}
3368
3369extern __inline __m128
3370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3371_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3372{
3373 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3374 (__v4sf)
3375 _mm_setzero_ps (),
3376 (__mmask8) __U);
3377}
3378
3379extern __inline __m256d
3380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3381_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3382{
3383 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3384 (__v4df) __W,
3385 (__mmask8) __U);
3386}
3387
3388extern __inline __m256d
3389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3390_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3391{
3392 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3393 (__v4df)
3394 _mm256_setzero_pd (),
3395 (__mmask8) __U);
3396}
3397
3398extern __inline __m128d
3399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3400_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3401{
3402 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3403 (__v2df) __W,
3404 (__mmask8) __U);
3405}
3406
3407extern __inline __m128d
3408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3410{
3411 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3412 (__v2df)
3413 _mm_setzero_pd (),
3414 (__mmask8) __U);
3415}
3416
3417extern __inline __m256
3418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3419_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3420{
3421 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3422 (__v8sf) __W,
3423 (__mmask8) __U);
3424}
3425
3426extern __inline __m256
3427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3429{
3430 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3431 (__v8sf)
3432 _mm256_setzero_ps (),
3433 (__mmask8) __U);
3434}
3435
3436extern __inline __m128
3437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3439{
3440 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3441 (__v4sf) __W,
3442 (__mmask8) __U);
3443}
3444
3445extern __inline __m128
3446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3448{
3449 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3450 (__v4sf)
3451 _mm_setzero_ps (),
3452 (__mmask8) __U);
3453}
3454
3455extern __inline __m256i
3456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3458 __m256i __B)
3459{
3460 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3461 (__v8si) __B,
3462 (__v8si) __W,
3463 (__mmask8) __U);
3464}
3465
3466extern __inline __m256i
3467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3469{
3470 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3471 (__v8si) __B,
3472 (__v8si)
3473 _mm256_setzero_si256 (),
3474 (__mmask8) __U);
3475}
3476
3477extern __inline __m256i
3478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3480 __m256i __B)
3481{
3482 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3483 (__v4di) __B,
3484 (__v4di) __W,
3485 (__mmask8) __U);
3486}
3487
3488extern __inline __m256i
3489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3491{
3492 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3493 (__v4di) __B,
3494 (__v4di)
3495 _mm256_setzero_si256 (),
3496 (__mmask8) __U);
3497}
3498
3499extern __inline __m256i
3500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3502 __m256i __B)
3503{
3504 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3505 (__v8si) __B,
3506 (__v8si) __W,
3507 (__mmask8) __U);
3508}
3509
3510extern __inline __m256i
3511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3513{
3514 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3515 (__v8si) __B,
3516 (__v8si)
3517 _mm256_setzero_si256 (),
3518 (__mmask8) __U);
3519}
3520
3521extern __inline __m256i
3522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3524 __m256i __B)
3525{
3526 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3527 (__v4di) __B,
3528 (__v4di) __W,
3529 (__mmask8) __U);
3530}
3531
3532extern __inline __m256i
3533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3535{
3536 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3537 (__v4di) __B,
3538 (__v4di)
3539 _mm256_setzero_si256 (),
3540 (__mmask8) __U);
3541}
3542
3543extern __inline __m128i
3544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3546 __m128i __B)
3547{
3548 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3549 (__v4si) __B,
3550 (__v4si) __W,
3551 (__mmask8) __U);
3552}
3553
3554extern __inline __m128i
3555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3557{
3558 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3559 (__v4si) __B,
3560 (__v4si)
3561 _mm_setzero_si128 (),
3562 (__mmask8) __U);
3563}
3564
3565extern __inline __m128i
3566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3568 __m128i __B)
3569{
3570 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3571 (__v2di) __B,
3572 (__v2di) __W,
3573 (__mmask8) __U);
3574}
3575
3576extern __inline __m128i
3577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3579{
3580 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3581 (__v2di) __B,
3582 (__v2di)
3583 _mm_setzero_si128 (),
3584 (__mmask8) __U);
3585}
3586
3587extern __inline __m128i
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3590 __m128i __B)
3591{
3592 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3593 (__v4si) __B,
3594 (__v4si) __W,
3595 (__mmask8) __U);
3596}
3597
3598extern __inline __m128i
3599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3601{
3602 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3603 (__v4si) __B,
3604 (__v4si)
3605 _mm_setzero_si128 (),
3606 (__mmask8) __U);
3607}
3608
3609extern __inline __m128i
3610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3612 __m128i __B)
3613{
3614 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3615 (__v2di) __B,
3616 (__v2di) __W,
3617 (__mmask8) __U);
3618}
3619
3620extern __inline __m128i
3621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3623{
3624 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3625 (__v2di) __B,
3626 (__v2di)
3627 _mm_setzero_si128 (),
3628 (__mmask8) __U);
3629}
3630
3631extern __inline __m256
3632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3633_mm256_getexp_ps (__m256 __A)
3634{
3635 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3636 (__v8sf)
3637 _mm256_setzero_ps (),
3638 (__mmask8) -1);
3639}
3640
3641extern __inline __m256
3642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3643_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3644{
3645 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3646 (__v8sf) __W,
3647 (__mmask8) __U);
3648}
3649
3650extern __inline __m256
3651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3652_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3653{
3654 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3655 (__v8sf)
3656 _mm256_setzero_ps (),
3657 (__mmask8) __U);
3658}
3659
3660extern __inline __m256d
3661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3662_mm256_getexp_pd (__m256d __A)
3663{
3664 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3665 (__v4df)
3666 _mm256_setzero_pd (),
3667 (__mmask8) -1);
3668}
3669
3670extern __inline __m256d
3671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3672_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3673{
3674 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3675 (__v4df) __W,
3676 (__mmask8) __U);
3677}
3678
3679extern __inline __m256d
3680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3681_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3682{
3683 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3684 (__v4df)
3685 _mm256_setzero_pd (),
3686 (__mmask8) __U);
3687}
3688
3689extern __inline __m128
3690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3691_mm_getexp_ps (__m128 __A)
3692{
3693 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3694 (__v4sf)
3695 _mm_setzero_ps (),
3696 (__mmask8) -1);
3697}
3698
3699extern __inline __m128
3700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3701_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3702{
3703 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3704 (__v4sf) __W,
3705 (__mmask8) __U);
3706}
3707
3708extern __inline __m128
3709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3711{
3712 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3713 (__v4sf)
3714 _mm_setzero_ps (),
3715 (__mmask8) __U);
3716}
3717
3718extern __inline __m128d
3719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3720_mm_getexp_pd (__m128d __A)
3721{
3722 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3723 (__v2df)
3724 _mm_setzero_pd (),
3725 (__mmask8) -1);
3726}
3727
3728extern __inline __m128d
3729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3730_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3731{
3732 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3733 (__v2df) __W,
3734 (__mmask8) __U);
3735}
3736
3737extern __inline __m128d
3738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3739_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3740{
3741 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3742 (__v2df)
3743 _mm_setzero_pd (),
3744 (__mmask8) __U);
3745}
3746
3747extern __inline __m256i
3748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3749_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3750 __m128i __B)
3751{
3752 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3753 (__v4si) __B,
3754 (__v8si) __W,
3755 (__mmask8) __U);
3756}
3757
3758extern __inline __m256i
3759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3760_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3761{
3762 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3763 (__v4si) __B,
3764 (__v8si)
3765 _mm256_setzero_si256 (),
3766 (__mmask8) __U);
3767}
3768
3769extern __inline __m128i
3770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3772 __m128i __B)
3773{
3774 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3775 (__v4si) __B,
3776 (__v4si) __W,
3777 (__mmask8) __U);
3778}
3779
3780extern __inline __m128i
3781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3783{
3784 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3785 (__v4si) __B,
3786 (__v4si)
3787 _mm_setzero_si128 (),
3788 (__mmask8) __U);
3789}
3790
3791extern __inline __m256i
3792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3794 __m128i __B)
3795{
3796 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3797 (__v2di) __B,
3798 (__v4di) __W,
3799 (__mmask8) __U);
3800}
3801
3802extern __inline __m256i
3803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3805{
3806 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3807 (__v2di) __B,
3808 (__v4di)
3809 _mm256_setzero_si256 (),
3810 (__mmask8) __U);
3811}
3812
3813extern __inline __m128i
3814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3816 __m128i __B)
3817{
3818 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3819 (__v2di) __B,
3820 (__v2di) __W,
3821 (__mmask8) __U);
3822}
3823
3824extern __inline __m128i
3825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3826_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3827{
3828 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3829 (__v2di) __B,
3830 (__v2di)
a25a7887 3831 _mm_setzero_si128 (),
936c0fe4
AI
3832 (__mmask8) __U);
3833}
3834
3835extern __inline __m256i
3836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3838 __m256i __B)
3839{
3840 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3841 (__v8si) __B,
3842 (__v8si) __W,
3843 (__mmask8) __U);
3844}
3845
3846extern __inline __m256i
3847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3849{
3850 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3851 (__v8si) __B,
3852 (__v8si)
3853 _mm256_setzero_si256 (),
3854 (__mmask8) __U);
3855}
3856
3857extern __inline __m256d
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm256_scalef_pd (__m256d __A, __m256d __B)
3860{
3861 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3862 (__v4df) __B,
3863 (__v4df)
3864 _mm256_setzero_pd (),
3865 (__mmask8) -1);
3866}
3867
3868extern __inline __m256d
3869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3871 __m256d __B)
3872{
3873 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3874 (__v4df) __B,
3875 (__v4df) __W,
3876 (__mmask8) __U);
3877}
3878
3879extern __inline __m256d
3880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3882{
3883 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3884 (__v4df) __B,
3885 (__v4df)
3886 _mm256_setzero_pd (),
3887 (__mmask8) __U);
3888}
3889
3890extern __inline __m256
3891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892_mm256_scalef_ps (__m256 __A, __m256 __B)
3893{
3894 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3895 (__v8sf) __B,
3896 (__v8sf)
3897 _mm256_setzero_ps (),
3898 (__mmask8) -1);
3899}
3900
3901extern __inline __m256
3902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3904 __m256 __B)
3905{
3906 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3907 (__v8sf) __B,
3908 (__v8sf) __W,
3909 (__mmask8) __U);
3910}
3911
3912extern __inline __m256
3913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3914_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3915{
3916 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3917 (__v8sf) __B,
3918 (__v8sf)
3919 _mm256_setzero_ps (),
3920 (__mmask8) __U);
3921}
3922
3923extern __inline __m128d
3924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925_mm_scalef_pd (__m128d __A, __m128d __B)
3926{
3927 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3928 (__v2df) __B,
3929 (__v2df)
3930 _mm_setzero_pd (),
3931 (__mmask8) -1);
3932}
3933
3934extern __inline __m128d
3935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3937 __m128d __B)
3938{
3939 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3940 (__v2df) __B,
3941 (__v2df) __W,
3942 (__mmask8) __U);
3943}
3944
3945extern __inline __m128d
3946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3947_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3948{
3949 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3950 (__v2df) __B,
3951 (__v2df)
3952 _mm_setzero_pd (),
3953 (__mmask8) __U);
3954}
3955
3956extern __inline __m128
3957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3958_mm_scalef_ps (__m128 __A, __m128 __B)
3959{
3960 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3961 (__v4sf) __B,
3962 (__v4sf)
3963 _mm_setzero_ps (),
3964 (__mmask8) -1);
3965}
3966
3967extern __inline __m128
3968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3969_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3970{
3971 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3972 (__v4sf) __B,
3973 (__v4sf) __W,
3974 (__mmask8) __U);
3975}
3976
3977extern __inline __m128
3978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3980{
3981 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3982 (__v4sf) __B,
3983 (__v4sf)
3984 _mm_setzero_ps (),
3985 (__mmask8) __U);
3986}
3987
3988extern __inline __m256d
3989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3991 __m256d __C)
3992{
3993 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3994 (__v4df) __B,
3995 (__v4df) __C,
3996 (__mmask8) __U);
3997}
3998
3999extern __inline __m256d
4000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4002 __mmask8 __U)
4003{
4004 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4005 (__v4df) __B,
4006 (__v4df) __C,
4007 (__mmask8) __U);
4008}
4009
4010extern __inline __m256d
4011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4013 __m256d __C)
4014{
4015 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4016 (__v4df) __B,
4017 (__v4df) __C,
4018 (__mmask8) __U);
4019}
4020
4021extern __inline __m128d
4022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4023_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4024{
4025 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4026 (__v2df) __B,
4027 (__v2df) __C,
4028 (__mmask8) __U);
4029}
4030
4031extern __inline __m128d
4032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4034 __mmask8 __U)
4035{
4036 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4037 (__v2df) __B,
4038 (__v2df) __C,
4039 (__mmask8) __U);
4040}
4041
4042extern __inline __m128d
4043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4044_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4045 __m128d __C)
4046{
4047 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4048 (__v2df) __B,
4049 (__v2df) __C,
4050 (__mmask8) __U);
4051}
4052
4053extern __inline __m256
4054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4055_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4056{
4057 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4058 (__v8sf) __B,
4059 (__v8sf) __C,
4060 (__mmask8) __U);
4061}
4062
4063extern __inline __m256
4064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4066 __mmask8 __U)
4067{
4068 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4069 (__v8sf) __B,
4070 (__v8sf) __C,
4071 (__mmask8) __U);
4072}
4073
4074extern __inline __m256
4075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4077 __m256 __C)
4078{
4079 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4080 (__v8sf) __B,
4081 (__v8sf) __C,
4082 (__mmask8) __U);
4083}
4084
4085extern __inline __m128
4086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4087_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4088{
4089 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4090 (__v4sf) __B,
4091 (__v4sf) __C,
4092 (__mmask8) __U);
4093}
4094
4095extern __inline __m128
4096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4097_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4098{
4099 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4100 (__v4sf) __B,
4101 (__v4sf) __C,
4102 (__mmask8) __U);
4103}
4104
4105extern __inline __m128
4106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4107_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4108{
4109 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4110 (__v4sf) __B,
4111 (__v4sf) __C,
4112 (__mmask8) __U);
4113}
4114
4115extern __inline __m256d
4116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4117_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4118 __m256d __C)
4119{
fe7f972d 4120 return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
936c0fe4 4121 (__v4df) __B,
fe7f972d 4122 (__v4df) __C,
936c0fe4
AI
4123 (__mmask8) __U);
4124}
4125
4126extern __inline __m256d
4127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4129 __mmask8 __U)
4130{
4131 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4132 (__v4df) __B,
4133 (__v4df) __C,
4134 (__mmask8) __U);
4135}
4136
4137extern __inline __m256d
4138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4140 __m256d __C)
4141{
fe7f972d 4142 return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
936c0fe4 4143 (__v4df) __B,
fe7f972d 4144 (__v4df) __C,
936c0fe4
AI
4145 (__mmask8) __U);
4146}
4147
4148extern __inline __m128d
4149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4150_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4151{
fe7f972d 4152 return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
936c0fe4 4153 (__v2df) __B,
fe7f972d 4154 (__v2df) __C,
936c0fe4
AI
4155 (__mmask8) __U);
4156}
4157
4158extern __inline __m128d
4159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4160_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4161 __mmask8 __U)
4162{
4163 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4164 (__v2df) __B,
4165 (__v2df) __C,
4166 (__mmask8) __U);
4167}
4168
4169extern __inline __m128d
4170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4172 __m128d __C)
4173{
fe7f972d 4174 return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
936c0fe4 4175 (__v2df) __B,
fe7f972d 4176 (__v2df) __C,
936c0fe4
AI
4177 (__mmask8) __U);
4178}
4179
4180extern __inline __m256
4181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4182_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4183{
fe7f972d 4184 return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
936c0fe4 4185 (__v8sf) __B,
fe7f972d 4186 (__v8sf) __C,
936c0fe4
AI
4187 (__mmask8) __U);
4188}
4189
4190extern __inline __m256
4191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4193 __mmask8 __U)
4194{
4195 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4196 (__v8sf) __B,
4197 (__v8sf) __C,
4198 (__mmask8) __U);
4199}
4200
4201extern __inline __m256
4202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4204 __m256 __C)
4205{
fe7f972d 4206 return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
936c0fe4 4207 (__v8sf) __B,
fe7f972d 4208 (__v8sf) __C,
936c0fe4
AI
4209 (__mmask8) __U);
4210}
4211
4212extern __inline __m128
4213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4215{
fe7f972d 4216 return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
936c0fe4 4217 (__v4sf) __B,
fe7f972d 4218 (__v4sf) __C,
936c0fe4
AI
4219 (__mmask8) __U);
4220}
4221
4222extern __inline __m128
4223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4225{
4226 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4227 (__v4sf) __B,
4228 (__v4sf) __C,
4229 (__mmask8) __U);
4230}
4231
4232extern __inline __m128
4233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4235{
fe7f972d 4236 return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
936c0fe4 4237 (__v4sf) __B,
fe7f972d 4238 (__v4sf) __C,
936c0fe4
AI
4239 (__mmask8) __U);
4240}
4241
4242extern __inline __m256d
4243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4245 __m256d __C)
4246{
4247 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4248 (__v4df) __B,
4249 (__v4df) __C,
4250 (__mmask8) __U);
4251}
4252
4253extern __inline __m256d
4254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4255_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4256 __mmask8 __U)
4257{
4258 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4259 (__v4df) __B,
4260 (__v4df) __C,
4261 (__mmask8)
4262 __U);
4263}
4264
4265extern __inline __m256d
4266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4267_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4268 __m256d __C)
4269{
4270 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4271 (__v4df) __B,
4272 (__v4df) __C,
4273 (__mmask8)
4274 __U);
4275}
4276
4277extern __inline __m128d
4278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4280 __m128d __C)
4281{
4282 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4283 (__v2df) __B,
4284 (__v2df) __C,
4285 (__mmask8) __U);
4286}
4287
4288extern __inline __m128d
4289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4290_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4291 __mmask8 __U)
4292{
4293 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4294 (__v2df) __B,
4295 (__v2df) __C,
4296 (__mmask8)
4297 __U);
4298}
4299
4300extern __inline __m128d
4301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4302_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4303 __m128d __C)
4304{
4305 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4306 (__v2df) __B,
4307 (__v2df) __C,
4308 (__mmask8)
4309 __U);
4310}
4311
4312extern __inline __m256
4313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4315 __m256 __C)
4316{
4317 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4318 (__v8sf) __B,
4319 (__v8sf) __C,
4320 (__mmask8) __U);
4321}
4322
4323extern __inline __m256
4324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4326 __mmask8 __U)
4327{
4328 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4329 (__v8sf) __B,
4330 (__v8sf) __C,
4331 (__mmask8) __U);
4332}
4333
4334extern __inline __m256
4335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4337 __m256 __C)
4338{
4339 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4340 (__v8sf) __B,
4341 (__v8sf) __C,
4342 (__mmask8) __U);
4343}
4344
4345extern __inline __m128
4346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4347_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4348{
4349 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4350 (__v4sf) __B,
4351 (__v4sf) __C,
4352 (__mmask8) __U);
4353}
4354
4355extern __inline __m128
4356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4357_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4358 __mmask8 __U)
4359{
4360 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4361 (__v4sf) __B,
4362 (__v4sf) __C,
4363 (__mmask8) __U);
4364}
4365
4366extern __inline __m128
4367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4369 __m128 __C)
4370{
4371 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4372 (__v4sf) __B,
4373 (__v4sf) __C,
4374 (__mmask8) __U);
4375}
4376
4377extern __inline __m256d
4378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4380 __m256d __C)
4381{
4382 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4383 (__v4df) __B,
4384 -(__v4df) __C,
4385 (__mmask8) __U);
4386}
4387
4388extern __inline __m256d
4389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4390_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4391 __mmask8 __U)
4392{
4393 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4394 (__v4df) __B,
4395 (__v4df) __C,
4396 (__mmask8)
4397 __U);
4398}
4399
4400extern __inline __m256d
4401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4402_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4403 __m256d __C)
4404{
4405 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4406 (__v4df) __B,
4407 -(__v4df) __C,
4408 (__mmask8)
4409 __U);
4410}
4411
4412extern __inline __m128d
4413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4415 __m128d __C)
4416{
4417 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4418 (__v2df) __B,
4419 -(__v2df) __C,
4420 (__mmask8) __U);
4421}
4422
4423extern __inline __m128d
4424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4425_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4426 __mmask8 __U)
4427{
4428 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4429 (__v2df) __B,
4430 (__v2df) __C,
4431 (__mmask8)
4432 __U);
4433}
4434
4435extern __inline __m128d
4436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4437_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4438 __m128d __C)
4439{
4440 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4441 (__v2df) __B,
4442 -(__v2df) __C,
4443 (__mmask8)
4444 __U);
4445}
4446
4447extern __inline __m256
4448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4450 __m256 __C)
4451{
4452 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4453 (__v8sf) __B,
4454 -(__v8sf) __C,
4455 (__mmask8) __U);
4456}
4457
4458extern __inline __m256
4459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4461 __mmask8 __U)
4462{
4463 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4464 (__v8sf) __B,
4465 (__v8sf) __C,
4466 (__mmask8) __U);
4467}
4468
4469extern __inline __m256
4470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4472 __m256 __C)
4473{
4474 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4475 (__v8sf) __B,
4476 -(__v8sf) __C,
4477 (__mmask8) __U);
4478}
4479
4480extern __inline __m128
4481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4482_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4483{
4484 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4485 (__v4sf) __B,
4486 -(__v4sf) __C,
4487 (__mmask8) __U);
4488}
4489
4490extern __inline __m128
4491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4493 __mmask8 __U)
4494{
4495 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4496 (__v4sf) __B,
4497 (__v4sf) __C,
4498 (__mmask8) __U);
4499}
4500
4501extern __inline __m128
4502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4504 __m128 __C)
4505{
4506 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4507 (__v4sf) __B,
4508 -(__v4sf) __C,
4509 (__mmask8) __U);
4510}
4511
4512extern __inline __m256d
4513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4515 __m256d __C)
4516{
4517 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4518 (__v4df) __B,
4519 (__v4df) __C,
4520 (__mmask8) __U);
4521}
4522
4523extern __inline __m256d
4524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4526 __mmask8 __U)
4527{
5ca94977
L
4528 return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
4529 (__v4df) __B,
4530 (__v4df) __C,
4531 (__mmask8) __U);
936c0fe4
AI
4532}
4533
4534extern __inline __m256d
4535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4537 __m256d __C)
4538{
5ca94977
L
4539 return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
4540 (__v4df) __B,
4541 (__v4df) __C,
4542 (__mmask8) __U);
936c0fe4
AI
4543}
4544
4545extern __inline __m128d
4546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4548 __m128d __C)
4549{
4550 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4551 (__v2df) __B,
4552 (__v2df) __C,
4553 (__mmask8) __U);
4554}
4555
4556extern __inline __m128d
4557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4558_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4559 __mmask8 __U)
4560{
5ca94977
L
4561 return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
4562 (__v2df) __B,
4563 (__v2df) __C,
4564 (__mmask8) __U);
936c0fe4
AI
4565}
4566
4567extern __inline __m128d
4568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4570 __m128d __C)
4571{
5ca94977
L
4572 return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
4573 (__v2df) __B,
4574 (__v2df) __C,
4575 (__mmask8) __U);
936c0fe4
AI
4576}
4577
4578extern __inline __m256
4579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4581 __m256 __C)
4582{
4583 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4584 (__v8sf) __B,
4585 (__v8sf) __C,
4586 (__mmask8) __U);
4587}
4588
4589extern __inline __m256
4590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4592 __mmask8 __U)
4593{
5ca94977
L
4594 return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
4595 (__v8sf) __B,
4596 (__v8sf) __C,
4597 (__mmask8) __U);
936c0fe4
AI
4598}
4599
4600extern __inline __m256
4601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4602_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4603 __m256 __C)
4604{
5ca94977
L
4605 return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
4606 (__v8sf) __B,
4607 (__v8sf) __C,
4608 (__mmask8) __U);
936c0fe4
AI
4609}
4610
4611extern __inline __m128
4612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4613_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4614{
4615 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4616 (__v4sf) __B,
4617 (__v4sf) __C,
4618 (__mmask8) __U);
4619}
4620
4621extern __inline __m128
4622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4623_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4624{
5ca94977
L
4625 return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
4626 (__v4sf) __B,
4627 (__v4sf) __C,
4628 (__mmask8) __U);
936c0fe4
AI
4629}
4630
4631extern __inline __m128
4632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4634{
5ca94977
L
4635 return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
4636 (__v4sf) __B,
4637 (__v4sf) __C,
4638 (__mmask8) __U);
936c0fe4
AI
4639}
4640
4641extern __inline __m256d
4642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4644 __m256d __C)
4645{
4646 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4647 (__v4df) __B,
4648 (__v4df) __C,
4649 (__mmask8) __U);
4650}
4651
4652extern __inline __m256d
4653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4655 __mmask8 __U)
4656{
4657 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4658 (__v4df) __B,
4659 (__v4df) __C,
4660 (__mmask8) __U);
4661}
4662
4663extern __inline __m256d
4664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4666 __m256d __C)
4667{
38ef6fb1
L
4668 return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
4669 (__v4df) __B,
4670 (__v4df) __C,
4671 (__mmask8) __U);
936c0fe4
AI
4672}
4673
4674extern __inline __m128d
4675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4677 __m128d __C)
4678{
4679 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4680 (__v2df) __B,
4681 (__v2df) __C,
4682 (__mmask8) __U);
4683}
4684
4685extern __inline __m128d
4686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4688 __mmask8 __U)
4689{
4690 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4691 (__v2df) __B,
4692 (__v2df) __C,
4693 (__mmask8) __U);
4694}
4695
4696extern __inline __m128d
4697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4699 __m128d __C)
4700{
38ef6fb1
L
4701 return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
4702 (__v2df) __B,
4703 (__v2df) __C,
4704 (__mmask8) __U);
936c0fe4
AI
4705}
4706
4707extern __inline __m256
4708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4710 __m256 __C)
4711{
4712 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4713 (__v8sf) __B,
4714 (__v8sf) __C,
4715 (__mmask8) __U);
4716}
4717
4718extern __inline __m256
4719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4721 __mmask8 __U)
4722{
4723 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4724 (__v8sf) __B,
4725 (__v8sf) __C,
4726 (__mmask8) __U);
4727}
4728
4729extern __inline __m256
4730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4731_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4732 __m256 __C)
4733{
38ef6fb1
L
4734 return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
4735 (__v8sf) __B,
4736 (__v8sf) __C,
4737 (__mmask8) __U);
936c0fe4
AI
4738}
4739
4740extern __inline __m128
4741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4743{
4744 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4745 (__v4sf) __B,
4746 (__v4sf) __C,
4747 (__mmask8) __U);
4748}
4749
4750extern __inline __m128
4751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4752_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4753{
4754 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4755 (__v4sf) __B,
4756 (__v4sf) __C,
4757 (__mmask8) __U);
4758}
4759
4760extern __inline __m128
4761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4763{
38ef6fb1
L
4764 return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
4765 (__v4sf) __B,
4766 (__v4sf) __C,
4767 (__mmask8) __U);
936c0fe4
AI
4768}
4769
4770extern __inline __m128i
4771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4773 __m128i __B)
4774{
4775 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4776 (__v4si) __B,
4777 (__v4si) __W,
4778 (__mmask8) __U);
4779}
4780
4781extern __inline __m128i
4782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4784{
4785 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4786 (__v4si) __B,
4787 (__v4si)
4788 _mm_setzero_si128 (),
4789 (__mmask8) __U);
4790}
4791
4792extern __inline __m256i
4793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4795 __m256i __B)
4796{
4797 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4798 (__v8si) __B,
4799 (__v8si) __W,
4800 (__mmask8) __U);
4801}
4802
4803extern __inline __m256i
4804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4806{
4807 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4808 (__v8si) __B,
4809 (__v8si)
4810 _mm256_setzero_si256 (),
4811 (__mmask8) __U);
4812}
4813
4814extern __inline __m128i
4815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4817 __m128i __B)
4818{
4819 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4820 (__v4si) __B,
4821 (__v4si) __W,
4822 (__mmask8) __U);
4823}
4824
4825extern __inline __m128i
4826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4828{
4829 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4830 (__v4si) __B,
4831 (__v4si)
4832 _mm_setzero_si128 (),
4833 (__mmask8) __U);
4834}
4835
4836extern __inline __m256i
4837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4839 __m256i __B)
4840{
4841 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4842 (__v8si) __B,
4843 (__v8si) __W,
4844 (__mmask8) __U);
4845}
4846
4847extern __inline __m256i
4848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4850{
4851 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4852 (__v8si) __B,
4853 (__v8si)
4854 _mm256_setzero_si256 (),
4855 (__mmask8) __U);
4856}
4857
01fd9f8d
L
4858extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4859_mm256_or_epi32 (__m256i __A, __m256i __B)
4860{
4861 return (__m256i) ((__v8su)__A | (__v8su)__B);
4862}
4863
936c0fe4
AI
4864extern __inline __m128i
4865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4866_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4867{
4868 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4869 (__v4si) __B,
4870 (__v4si) __W,
4871 (__mmask8) __U);
4872}
4873
4874extern __inline __m128i
4875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4876_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4877{
4878 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4879 (__v4si) __B,
4880 (__v4si)
4881 _mm_setzero_si128 (),
4882 (__mmask8) __U);
4883}
4884
01fd9f8d
L
4885extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4886_mm_or_epi32 (__m128i __A, __m128i __B)
4887{
4888 return (__m128i) ((__v4su)__A | (__v4su)__B);
4889}
4890
936c0fe4
AI
4891extern __inline __m256i
4892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4893_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4894 __m256i __B)
4895{
4896 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4897 (__v8si) __B,
4898 (__v8si) __W,
4899 (__mmask8) __U);
4900}
4901
4902extern __inline __m256i
4903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4905{
4906 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4907 (__v8si) __B,
4908 (__v8si)
4909 _mm256_setzero_si256 (),
4910 (__mmask8) __U);
4911}
4912
01fd9f8d
L
4913extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4914_mm256_xor_epi32 (__m256i __A, __m256i __B)
4915{
4916 return (__m256i) ((__v8su)__A ^ (__v8su)__B);
4917}
4918
936c0fe4
AI
4919extern __inline __m128i
4920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4922 __m128i __B)
4923{
4924 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4925 (__v4si) __B,
4926 (__v4si) __W,
4927 (__mmask8) __U);
4928}
4929
4930extern __inline __m128i
4931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4932_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4933{
4934 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4935 (__v4si) __B,
4936 (__v4si)
4937 _mm_setzero_si128 (),
4938 (__mmask8) __U);
4939}
4940
01fd9f8d
L
4941extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4942_mm_xor_epi32 (__m128i __A, __m128i __B)
4943{
4944 return (__m128i) ((__v4su)__A ^ (__v4su)__B);
4945}
4946
936c0fe4
AI
4947extern __inline __m128
4948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4949_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4950{
4951 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4952 (__v4sf) __W,
4953 (__mmask8) __U);
4954}
4955
4956extern __inline __m128
4957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4959{
4960 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4961 (__v4sf)
4962 _mm_setzero_ps (),
4963 (__mmask8) __U);
4964}
4965
4966extern __inline __m128
4967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4968_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4969{
4970 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4971 (__v4sf) __W,
4972 (__mmask8) __U);
4973}
4974
4975extern __inline __m128
4976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4978{
4979 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4980 (__v4sf)
4981 _mm_setzero_ps (),
4982 (__mmask8) __U);
4983}
4984
4985extern __inline __m256i
4986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4987_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4988{
4989 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4990 (__v8si) __W,
4991 (__mmask8) __U);
4992}
4993
4994extern __inline __m256i
4995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4997{
4998 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4999 (__v8si)
5000 _mm256_setzero_si256 (),
5001 (__mmask8) __U);
5002}
5003
5004extern __inline __m128i
5005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5006_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
5007{
5008 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5009 (__v4si) __W,
5010 (__mmask8) __U);
5011}
5012
5013extern __inline __m128i
5014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5015_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
5016{
5017 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5018 (__v4si)
5019 _mm_setzero_si128 (),
5020 (__mmask8) __U);
5021}
5022
5023extern __inline __m256i
5024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5025_mm256_cvtps_epu32 (__m256 __A)
5026{
5027 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5028 (__v8si)
5029 _mm256_setzero_si256 (),
5030 (__mmask8) -1);
5031}
5032
5033extern __inline __m256i
5034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5036{
5037 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5038 (__v8si) __W,
5039 (__mmask8) __U);
5040}
5041
5042extern __inline __m256i
5043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5045{
5046 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5047 (__v8si)
5048 _mm256_setzero_si256 (),
5049 (__mmask8) __U);
5050}
5051
5052extern __inline __m128i
5053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5054_mm_cvtps_epu32 (__m128 __A)
5055{
5056 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5057 (__v4si)
5058 _mm_setzero_si128 (),
5059 (__mmask8) -1);
5060}
5061
5062extern __inline __m128i
5063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5064_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5065{
5066 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5067 (__v4si) __W,
5068 (__mmask8) __U);
5069}
5070
5071extern __inline __m128i
5072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5074{
5075 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5076 (__v4si)
5077 _mm_setzero_si128 (),
5078 (__mmask8) __U);
5079}
5080
5081extern __inline __m256d
5082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5084{
5085 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5086 (__v4df) __W,
5087 (__mmask8) __U);
5088}
5089
5090extern __inline __m256d
5091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5093{
5094 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5095 (__v4df)
5096 _mm256_setzero_pd (),
5097 (__mmask8) __U);
5098}
5099
5100extern __inline __m128d
5101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5102_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5103{
5104 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5105 (__v2df) __W,
5106 (__mmask8) __U);
5107}
5108
5109extern __inline __m128d
5110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5112{
5113 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5114 (__v2df)
5115 _mm_setzero_pd (),
5116 (__mmask8) __U);
5117}
5118
5119extern __inline __m256
5120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5122{
5123 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5124 (__v8sf) __W,
5125 (__mmask8) __U);
5126}
5127
5128extern __inline __m256
5129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5131{
5132 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5133 (__v8sf)
5134 _mm256_setzero_ps (),
5135 (__mmask8) __U);
5136}
5137
5138extern __inline __m128
5139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5141{
5142 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5143 (__v4sf) __W,
5144 (__mmask8) __U);
5145}
5146
5147extern __inline __m128
5148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5150{
5151 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5152 (__v4sf)
5153 _mm_setzero_ps (),
5154 (__mmask8) __U);
5155}
5156
5157extern __inline __m256
5158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5159_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5160{
5161 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5162 (__v8sf) __W,
5163 (__mmask8) __U);
5164}
5165
5166extern __inline __m256
5167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5169{
5170 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5171 (__v8sf)
5172 _mm256_setzero_ps (),
5173 (__mmask8) __U);
5174}
5175
5176extern __inline __m128
5177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5178_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5179{
5180 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5181 (__v4sf) __W,
5182 (__mmask8) __U);
5183}
5184
5185extern __inline __m128
5186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5188{
5189 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5190 (__v4sf)
5191 _mm_setzero_ps (),
5192 (__mmask8) __U);
5193}
5194
5195extern __inline __m128i
5196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5197_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5198 __m128i __B)
5199{
5200 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5201 (__v4si) __B,
5202 (__v4si) __W,
5203 (__mmask8) __U);
5204}
5205
5206extern __inline __m128i
5207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5208_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5209{
5210 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5211 (__v4si) __B,
5212 (__v4si)
5213 _mm_setzero_si128 (),
5214 (__mmask8) __U);
5215}
5216
5217extern __inline __m256i
5218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5219_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5220 __m256i __B)
5221{
5222 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5223 (__v8si) __B,
5224 (__v8si) __W,
5225 (__mmask8) __U);
5226}
5227
5228extern __inline __m256i
5229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5230_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5231{
5232 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5233 (__v8si) __B,
5234 (__v8si)
5235 _mm256_setzero_si256 (),
5236 (__mmask8) __U);
5237}
5238
5239extern __inline __m128i
5240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5241_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5242 __m128i __B)
5243{
5244 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5245 (__v2di) __B,
5246 (__v2di) __W,
5247 (__mmask8) __U);
5248}
5249
5250extern __inline __m128i
5251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5252_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5253{
5254 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5255 (__v2di) __B,
5256 (__v2di)
a25a7887 5257 _mm_setzero_si128 (),
936c0fe4
AI
5258 (__mmask8) __U);
5259}
5260
5261extern __inline __m256i
5262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5263_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5264 __m256i __B)
5265{
5266 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5267 (__v4di) __B,
5268 (__v4di) __W,
5269 (__mmask8) __U);
5270}
5271
5272extern __inline __m256i
5273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5274_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5275{
5276 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5277 (__v4di) __B,
5278 (__v4di)
5279 _mm256_setzero_si256 (),
5280 (__mmask8) __U);
5281}
5282
5283extern __inline __m128i
5284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5285_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5286 __m128i __B)
5287{
5288 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5289 (__v4si) __B,
5290 (__v4si) __W,
5291 (__mmask8) __U);
5292}
5293
5294extern __inline __m128i
5295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5296_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5297{
5298 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5299 (__v4si) __B,
5300 (__v4si)
5301 _mm_setzero_si128 (),
5302 (__mmask8) __U);
5303}
5304
5305extern __inline __m256i
5306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5307_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5308 __m256i __B)
5309{
5310 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5311 (__v8si) __B,
5312 (__v8si) __W,
5313 (__mmask8) __U);
5314}
5315
5316extern __inline __m256i
5317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5318_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5319{
5320 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5321 (__v8si) __B,
5322 (__v8si)
5323 _mm256_setzero_si256 (),
5324 (__mmask8) __U);
5325}
5326
5327extern __inline __m128i
5328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5329_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5330 __m128i __B)
5331{
5332 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5333 (__v2di) __B,
5334 (__v2di) __W,
5335 (__mmask8) __U);
5336}
5337
5338extern __inline __m128i
5339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5341{
5342 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5343 (__v2di) __B,
5344 (__v2di)
a25a7887 5345 _mm_setzero_si128 (),
936c0fe4
AI
5346 (__mmask8) __U);
5347}
5348
5349extern __inline __m256i
5350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5351_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5352 __m256i __B)
5353{
5354 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5355 (__v4di) __B,
5356 (__v4di) __W,
5357 (__mmask8) __U);
5358}
5359
5360extern __inline __m256i
5361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5362_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5363{
5364 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5365 (__v4di) __B,
5366 (__v4di)
5367 _mm256_setzero_si256 (),
5368 (__mmask8) __U);
5369}
5370
eee5d6f5
AI
5371extern __inline __mmask8
5372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5373_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5374{
5375 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5376 (__v4si) __B, 0,
5377 (__mmask8) -1);
5378}
5379
936c0fe4
AI
5380extern __inline __mmask8
5381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5382_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5383{
5384 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5385 (__v4si) __B,
5386 (__mmask8) -1);
5387}
5388
eee5d6f5
AI
5389extern __inline __mmask8
5390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5391_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5392{
5393 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5394 (__v4si) __B, 0, __U);
5395}
5396
936c0fe4
AI
5397extern __inline __mmask8
5398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5399_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5400{
5401 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5402 (__v4si) __B, __U);
5403}
5404
eee5d6f5
AI
5405extern __inline __mmask8
5406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5407_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5408{
5409 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5410 (__v8si) __B, 0,
5411 (__mmask8) -1);
5412}
5413
936c0fe4
AI
5414extern __inline __mmask8
5415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5416_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5417{
5418 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5419 (__v8si) __B,
5420 (__mmask8) -1);
5421}
5422
eee5d6f5
AI
5423extern __inline __mmask8
5424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5426{
5427 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5428 (__v8si) __B, 0, __U);
5429}
5430
936c0fe4
AI
5431extern __inline __mmask8
5432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5433_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5434{
5435 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5436 (__v8si) __B, __U);
5437}
5438
eee5d6f5
AI
5439extern __inline __mmask8
5440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5441_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5442{
5443 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5444 (__v2di) __B, 0,
5445 (__mmask8) -1);
5446}
5447
936c0fe4
AI
5448extern __inline __mmask8
5449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5450_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5451{
5452 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5453 (__v2di) __B,
5454 (__mmask8) -1);
5455}
5456
eee5d6f5
AI
5457extern __inline __mmask8
5458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5459_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5460{
5461 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5462 (__v2di) __B, 0, __U);
5463}
5464
936c0fe4
AI
5465extern __inline __mmask8
5466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5467_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5468{
5469 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5470 (__v2di) __B, __U);
5471}
5472
eee5d6f5
AI
5473extern __inline __mmask8
5474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5475_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5476{
5477 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5478 (__v4di) __B, 0,
5479 (__mmask8) -1);
5480}
5481
936c0fe4
AI
5482extern __inline __mmask8
5483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5484_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5485{
5486 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5487 (__v4di) __B,
5488 (__mmask8) -1);
5489}
5490
eee5d6f5
AI
5491extern __inline __mmask8
5492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5493_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5494{
5495 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5496 (__v4di) __B, 0, __U);
5497}
5498
936c0fe4
AI
5499extern __inline __mmask8
5500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5501_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5502{
5503 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5504 (__v4di) __B, __U);
5505}
5506
eee5d6f5
AI
5507extern __inline __mmask8
5508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5509_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5510{
5511 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5512 (__v4si) __B, 6,
5513 (__mmask8) -1);
5514}
5515
936c0fe4
AI
5516extern __inline __mmask8
5517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5518_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5519{
5520 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5521 (__v4si) __B,
5522 (__mmask8) -1);
5523}
5524
eee5d6f5
AI
5525extern __inline __mmask8
5526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5527_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5528{
5529 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5530 (__v4si) __B, 6, __U);
5531}
5532
936c0fe4
AI
5533extern __inline __mmask8
5534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5535_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5536{
5537 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5538 (__v4si) __B, __U);
5539}
5540
eee5d6f5
AI
5541extern __inline __mmask8
5542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5544{
5545 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5546 (__v8si) __B, 6,
5547 (__mmask8) -1);
5548}
5549
936c0fe4
AI
5550extern __inline __mmask8
5551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5552_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5553{
5554 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5555 (__v8si) __B,
5556 (__mmask8) -1);
5557}
5558
eee5d6f5
AI
5559extern __inline __mmask8
5560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5561_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5562{
5563 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5564 (__v8si) __B, 6, __U);
5565}
5566
936c0fe4
AI
5567extern __inline __mmask8
5568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5569_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5570{
5571 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5572 (__v8si) __B, __U);
5573}
5574
eee5d6f5
AI
5575extern __inline __mmask8
5576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5578{
5579 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5580 (__v2di) __B, 6,
5581 (__mmask8) -1);
5582}
5583
936c0fe4
AI
5584extern __inline __mmask8
5585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5586_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5587{
5588 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5589 (__v2di) __B,
5590 (__mmask8) -1);
5591}
5592
eee5d6f5
AI
5593extern __inline __mmask8
5594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5595_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5596{
5597 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5598 (__v2di) __B, 6, __U);
5599}
5600
936c0fe4
AI
5601extern __inline __mmask8
5602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5604{
5605 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5606 (__v2di) __B, __U);
5607}
5608
eee5d6f5
AI
5609extern __inline __mmask8
5610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5611_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5612{
5613 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5614 (__v4di) __B, 6,
5615 (__mmask8) -1);
5616}
5617
936c0fe4
AI
5618extern __inline __mmask8
5619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5620_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5621{
5622 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5623 (__v4di) __B,
5624 (__mmask8) -1);
5625}
5626
eee5d6f5
AI
5627extern __inline __mmask8
5628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5630{
5631 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5632 (__v4di) __B, 6, __U);
5633}
5634
936c0fe4
AI
5635extern __inline __mmask8
5636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5637_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5638{
5639 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5640 (__v4di) __B, __U);
5641}
5642
5643extern __inline __mmask8
5644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5645_mm_test_epi32_mask (__m128i __A, __m128i __B)
5646{
5647 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5648 (__v4si) __B,
5649 (__mmask8) -1);
5650}
5651
5652extern __inline __mmask8
5653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5654_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5655{
5656 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5657 (__v4si) __B, __U);
5658}
5659
5660extern __inline __mmask8
5661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5662_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5663{
5664 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5665 (__v8si) __B,
5666 (__mmask8) -1);
5667}
5668
5669extern __inline __mmask8
5670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5671_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5672{
5673 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5674 (__v8si) __B, __U);
5675}
5676
5677extern __inline __mmask8
5678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5679_mm_test_epi64_mask (__m128i __A, __m128i __B)
5680{
5681 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5682 (__v2di) __B,
5683 (__mmask8) -1);
5684}
5685
5686extern __inline __mmask8
5687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5688_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5689{
5690 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5691 (__v2di) __B, __U);
5692}
5693
5694extern __inline __mmask8
5695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5697{
5698 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5699 (__v4di) __B,
5700 (__mmask8) -1);
5701}
5702
5703extern __inline __mmask8
5704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5705_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5706{
5707 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5708 (__v4di) __B, __U);
5709}
5710
5711extern __inline __mmask8
5712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5713_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5714{
5715 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5716 (__v4si) __B,
5717 (__mmask8) -1);
5718}
5719
5720extern __inline __mmask8
5721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5722_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5723{
5724 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5725 (__v4si) __B, __U);
5726}
5727
5728extern __inline __mmask8
5729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5730_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5731{
5732 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5733 (__v8si) __B,
5734 (__mmask8) -1);
5735}
5736
5737extern __inline __mmask8
5738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5739_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5740{
5741 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5742 (__v8si) __B, __U);
5743}
5744
5745extern __inline __mmask8
5746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5748{
5749 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5750 (__v2di) __B,
5751 (__mmask8) -1);
5752}
5753
5754extern __inline __mmask8
5755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5756_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5757{
5758 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5759 (__v2di) __B, __U);
5760}
5761
5762extern __inline __mmask8
5763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5764_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5765{
5766 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5767 (__v4di) __B,
5768 (__mmask8) -1);
5769}
5770
5771extern __inline __mmask8
5772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5773_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5774{
5775 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5776 (__v4di) __B, __U);
5777}
5778
5779extern __inline __m256d
5780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5782{
5783 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5784 (__v4df) __W,
5785 (__mmask8) __U);
5786}
5787
5788extern __inline __m256d
5789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5791{
5792 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5793 (__v4df)
5794 _mm256_setzero_pd (),
5795 (__mmask8) __U);
5796}
5797
5798extern __inline void
5799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5800_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5801{
5802 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5803 (__v4df) __A,
5804 (__mmask8) __U);
5805}
5806
5807extern __inline __m128d
5808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5810{
5811 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5812 (__v2df) __W,
5813 (__mmask8) __U);
5814}
5815
5816extern __inline __m128d
5817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5819{
5820 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5821 (__v2df)
5822 _mm_setzero_pd (),
5823 (__mmask8) __U);
5824}
5825
5826extern __inline void
5827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5828_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5829{
5830 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5831 (__v2df) __A,
5832 (__mmask8) __U);
5833}
5834
5835extern __inline __m256
5836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5838{
5839 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5840 (__v8sf) __W,
5841 (__mmask8) __U);
5842}
5843
5844extern __inline __m256
5845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5847{
5848 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5849 (__v8sf)
5850 _mm256_setzero_ps (),
5851 (__mmask8) __U);
5852}
5853
5854extern __inline void
5855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5857{
5858 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5859 (__v8sf) __A,
5860 (__mmask8) __U);
5861}
5862
5863extern __inline __m128
5864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5865_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5866{
5867 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5868 (__v4sf) __W,
5869 (__mmask8) __U);
5870}
5871
5872extern __inline __m128
5873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5874_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5875{
5876 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5877 (__v4sf)
5878 _mm_setzero_ps (),
5879 (__mmask8) __U);
5880}
5881
5882extern __inline void
5883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5884_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5885{
5886 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5887 (__v4sf) __A,
5888 (__mmask8) __U);
5889}
5890
5891extern __inline __m256i
5892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5893_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5894{
5895 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5896 (__v4di) __W,
5897 (__mmask8) __U);
5898}
5899
5900extern __inline __m256i
5901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5902_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5903{
5904 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5905 (__v4di)
5906 _mm256_setzero_si256 (),
5907 (__mmask8) __U);
5908}
5909
5910extern __inline void
5911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5912_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5913{
5914 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5915 (__v4di) __A,
5916 (__mmask8) __U);
5917}
5918
5919extern __inline __m128i
5920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5922{
5923 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5924 (__v2di) __W,
5925 (__mmask8) __U);
5926}
5927
5928extern __inline __m128i
5929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5931{
5932 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5933 (__v2di)
a25a7887 5934 _mm_setzero_si128 (),
936c0fe4
AI
5935 (__mmask8) __U);
5936}
5937
5938extern __inline void
5939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5941{
5942 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5943 (__v2di) __A,
5944 (__mmask8) __U);
5945}
5946
5947extern __inline __m256i
5948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5950{
5951 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5952 (__v8si) __W,
5953 (__mmask8) __U);
5954}
5955
5956extern __inline __m256i
5957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5958_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5959{
5960 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5961 (__v8si)
5962 _mm256_setzero_si256 (),
5963 (__mmask8) __U);
5964}
5965
5966extern __inline void
5967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5969{
5970 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5971 (__v8si) __A,
5972 (__mmask8) __U);
5973}
5974
5975extern __inline __m128i
5976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5977_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5978{
5979 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5980 (__v4si) __W,
5981 (__mmask8) __U);
5982}
5983
5984extern __inline __m128i
5985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5987{
5988 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5989 (__v4si)
5990 _mm_setzero_si128 (),
5991 (__mmask8) __U);
5992}
5993
5994extern __inline void
5995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5997{
5998 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5999 (__v4si) __A,
6000 (__mmask8) __U);
6001}
6002
6003extern __inline __m256d
6004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6005_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
6006{
6007 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
6008 (__v4df) __W,
6009 (__mmask8) __U);
6010}
6011
6012extern __inline __m256d
6013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6014_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
6015{
6016 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
6017 (__v4df)
6018 _mm256_setzero_pd (),
6019 (__mmask8) __U);
6020}
6021
6022extern __inline __m256d
6023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6024_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6025{
6026 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
6027 (__v4df) __W,
6028 (__mmask8)
6029 __U);
6030}
6031
6032extern __inline __m256d
6033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6034_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6035{
6036 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6037 (__v4df)
6038 _mm256_setzero_pd (),
6039 (__mmask8)
6040 __U);
6041}
6042
6043extern __inline __m128d
6044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6045_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6046{
6047 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6048 (__v2df) __W,
6049 (__mmask8) __U);
6050}
6051
6052extern __inline __m128d
6053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6054_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6055{
6056 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6057 (__v2df)
6058 _mm_setzero_pd (),
6059 (__mmask8) __U);
6060}
6061
6062extern __inline __m128d
6063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6064_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6065{
6066 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6067 (__v2df) __W,
6068 (__mmask8)
6069 __U);
6070}
6071
6072extern __inline __m128d
6073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6074_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6075{
6076 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6077 (__v2df)
6078 _mm_setzero_pd (),
6079 (__mmask8)
6080 __U);
6081}
6082
6083extern __inline __m256
6084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6085_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6086{
6087 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6088 (__v8sf) __W,
6089 (__mmask8) __U);
6090}
6091
6092extern __inline __m256
6093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6094_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6095{
6096 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6097 (__v8sf)
6098 _mm256_setzero_ps (),
6099 (__mmask8) __U);
6100}
6101
6102extern __inline __m256
6103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6104_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6105{
6106 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6107 (__v8sf) __W,
6108 (__mmask8) __U);
6109}
6110
6111extern __inline __m256
6112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6113_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6114{
6115 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6116 (__v8sf)
6117 _mm256_setzero_ps (),
6118 (__mmask8)
6119 __U);
6120}
6121
6122extern __inline __m128
6123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6124_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6125{
6126 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6127 (__v4sf) __W,
6128 (__mmask8) __U);
6129}
6130
6131extern __inline __m128
6132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6133_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6134{
6135 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6136 (__v4sf)
6137 _mm_setzero_ps (),
6138 (__mmask8) __U);
6139}
6140
6141extern __inline __m128
6142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6143_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6144{
6145 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6146 (__v4sf) __W,
6147 (__mmask8) __U);
6148}
6149
6150extern __inline __m128
6151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6153{
6154 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6155 (__v4sf)
6156 _mm_setzero_ps (),
6157 (__mmask8)
6158 __U);
6159}
6160
6161extern __inline __m256i
6162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6163_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6164{
6165 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6166 (__v4di) __W,
6167 (__mmask8) __U);
6168}
6169
6170extern __inline __m256i
6171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6173{
6174 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6175 (__v4di)
6176 _mm256_setzero_si256 (),
6177 (__mmask8) __U);
6178}
6179
6180extern __inline __m256i
6181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6183 void const *__P)
6184{
6185 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6186 (__v4di) __W,
6187 (__mmask8)
6188 __U);
6189}
6190
6191extern __inline __m256i
6192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6193_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6194{
6195 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6196 (__v4di)
6197 _mm256_setzero_si256 (),
6198 (__mmask8)
6199 __U);
6200}
6201
6202extern __inline __m128i
6203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6204_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6205{
6206 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6207 (__v2di) __W,
6208 (__mmask8) __U);
6209}
6210
6211extern __inline __m128i
6212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6213_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6214{
6215 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6216 (__v2di)
6217 _mm_setzero_si128 (),
6218 (__mmask8) __U);
6219}
6220
6221extern __inline __m128i
6222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6223_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6224{
6225 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6226 (__v2di) __W,
6227 (__mmask8)
6228 __U);
6229}
6230
6231extern __inline __m128i
6232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6233_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6234{
6235 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6236 (__v2di)
6237 _mm_setzero_si128 (),
6238 (__mmask8)
6239 __U);
6240}
6241
6242extern __inline __m256i
6243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6244_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6245{
6246 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6247 (__v8si) __W,
6248 (__mmask8) __U);
6249}
6250
6251extern __inline __m256i
6252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6253_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6254{
6255 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6256 (__v8si)
6257 _mm256_setzero_si256 (),
6258 (__mmask8) __U);
6259}
6260
6261extern __inline __m256i
6262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6263_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6264 void const *__P)
6265{
6266 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6267 (__v8si) __W,
6268 (__mmask8)
6269 __U);
6270}
6271
6272extern __inline __m256i
6273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6274_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6275{
6276 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6277 (__v8si)
6278 _mm256_setzero_si256 (),
6279 (__mmask8)
6280 __U);
6281}
6282
6283extern __inline __m128i
6284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6285_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6286{
6287 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6288 (__v4si) __W,
6289 (__mmask8) __U);
6290}
6291
6292extern __inline __m128i
6293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6294_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6295{
6296 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6297 (__v4si)
6298 _mm_setzero_si128 (),
6299 (__mmask8) __U);
6300}
6301
6302extern __inline __m128i
6303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6304_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6305{
6306 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6307 (__v4si) __W,
6308 (__mmask8)
6309 __U);
6310}
6311
6312extern __inline __m128i
6313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6314_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6315{
6316 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6317 (__v4si)
6318 _mm_setzero_si128 (),
6319 (__mmask8)
6320 __U);
6321}
6322
6323extern __inline __m256d
6324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6326{
6327 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6328 /* idx */ ,
6329 (__v4df) __A,
6330 (__v4df) __B,
c42b0bdf 6331 (__mmask8) -1);
936c0fe4
AI
6332}
6333
6334extern __inline __m256d
6335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6336_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6337 __m256d __B)
6338{
6339 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6340 /* idx */ ,
6341 (__v4df) __A,
6342 (__v4df) __B,
6343 (__mmask8)
6344 __U);
6345}
6346
6347extern __inline __m256d
6348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6349_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6350 __m256d __B)
6351{
6352 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6353 (__v4di) __I
6354 /* idx */ ,
6355 (__v4df) __B,
6356 (__mmask8)
6357 __U);
6358}
6359
6360extern __inline __m256d
6361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6362_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6363 __m256d __B)
6364{
6365 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6366 /* idx */ ,
6367 (__v4df) __A,
6368 (__v4df) __B,
6369 (__mmask8)
6370 __U);
6371}
6372
6373extern __inline __m256
6374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6375_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6376{
6377 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6378 /* idx */ ,
6379 (__v8sf) __A,
6380 (__v8sf) __B,
6381 (__mmask8) -1);
6382}
6383
6384extern __inline __m256
6385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6386_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6387 __m256 __B)
6388{
6389 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6390 /* idx */ ,
6391 (__v8sf) __A,
6392 (__v8sf) __B,
6393 (__mmask8) __U);
6394}
6395
6396extern __inline __m256
6397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6398_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6399 __m256 __B)
6400{
6401 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6402 (__v8si) __I
6403 /* idx */ ,
6404 (__v8sf) __B,
6405 (__mmask8) __U);
6406}
6407
6408extern __inline __m256
6409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6410_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6411 __m256 __B)
6412{
6413 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6414 /* idx */ ,
6415 (__v8sf) __A,
6416 (__v8sf) __B,
6417 (__mmask8)
6418 __U);
6419}
6420
6421extern __inline __m128i
6422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6424{
6425 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6426 /* idx */ ,
6427 (__v2di) __A,
6428 (__v2di) __B,
6429 (__mmask8) -1);
6430}
6431
6432extern __inline __m128i
6433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6435 __m128i __B)
6436{
6437 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6438 /* idx */ ,
6439 (__v2di) __A,
6440 (__v2di) __B,
6441 (__mmask8) __U);
6442}
6443
6444extern __inline __m128i
6445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6447 __m128i __B)
6448{
6449 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6450 (__v2di) __I
6451 /* idx */ ,
6452 (__v2di) __B,
6453 (__mmask8) __U);
6454}
6455
6456extern __inline __m128i
6457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6458_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6459 __m128i __B)
6460{
6461 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6462 /* idx */ ,
6463 (__v2di) __A,
6464 (__v2di) __B,
6465 (__mmask8)
6466 __U);
6467}
6468
6469extern __inline __m128i
6470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6471_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6472{
6473 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6474 /* idx */ ,
6475 (__v4si) __A,
6476 (__v4si) __B,
6477 (__mmask8) -1);
6478}
6479
6480extern __inline __m128i
6481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6482_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6483 __m128i __B)
6484{
6485 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6486 /* idx */ ,
6487 (__v4si) __A,
6488 (__v4si) __B,
6489 (__mmask8) __U);
6490}
6491
6492extern __inline __m128i
6493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6494_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6495 __m128i __B)
6496{
6497 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6498 (__v4si) __I
6499 /* idx */ ,
6500 (__v4si) __B,
6501 (__mmask8) __U);
6502}
6503
6504extern __inline __m128i
6505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6506_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6507 __m128i __B)
6508{
6509 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6510 /* idx */ ,
6511 (__v4si) __A,
6512 (__v4si) __B,
6513 (__mmask8)
6514 __U);
6515}
6516
6517extern __inline __m256i
6518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6519_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6520{
6521 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6522 /* idx */ ,
6523 (__v4di) __A,
6524 (__v4di) __B,
6525 (__mmask8) -1);
6526}
6527
6528extern __inline __m256i
6529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6531 __m256i __B)
6532{
6533 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6534 /* idx */ ,
6535 (__v4di) __A,
6536 (__v4di) __B,
6537 (__mmask8) __U);
6538}
6539
6540extern __inline __m256i
6541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6542_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6543 __mmask8 __U, __m256i __B)
6544{
6545 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6546 (__v4di) __I
6547 /* idx */ ,
6548 (__v4di) __B,
6549 (__mmask8) __U);
6550}
6551
6552extern __inline __m256i
6553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6554_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6555 __m256i __I, __m256i __B)
6556{
6557 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6558 /* idx */ ,
6559 (__v4di) __A,
6560 (__v4di) __B,
6561 (__mmask8)
6562 __U);
6563}
6564
6565extern __inline __m256i
6566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6567_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6568{
6569 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6570 /* idx */ ,
6571 (__v8si) __A,
6572 (__v8si) __B,
6573 (__mmask8) -1);
6574}
6575
6576extern __inline __m256i
6577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6578_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6579 __m256i __B)
6580{
6581 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6582 /* idx */ ,
6583 (__v8si) __A,
6584 (__v8si) __B,
6585 (__mmask8) __U);
6586}
6587
6588extern __inline __m256i
6589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6590_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6591 __mmask8 __U, __m256i __B)
6592{
6593 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6594 (__v8si) __I
6595 /* idx */ ,
6596 (__v8si) __B,
6597 (__mmask8) __U);
6598}
6599
6600extern __inline __m256i
6601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6602_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6603 __m256i __I, __m256i __B)
6604{
6605 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6606 /* idx */ ,
6607 (__v8si) __A,
6608 (__v8si) __B,
6609 (__mmask8)
6610 __U);
6611}
6612
6613extern __inline __m128d
6614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6615_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6616{
6617 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6618 /* idx */ ,
6619 (__v2df) __A,
6620 (__v2df) __B,
c42b0bdf 6621 (__mmask8) -1);
936c0fe4
AI
6622}
6623
6624extern __inline __m128d
6625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6626_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6627 __m128d __B)
6628{
6629 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6630 /* idx */ ,
6631 (__v2df) __A,
6632 (__v2df) __B,
6633 (__mmask8)
6634 __U);
6635}
6636
6637extern __inline __m128d
6638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6639_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6640 __m128d __B)
6641{
6642 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6643 (__v2di) __I
6644 /* idx */ ,
6645 (__v2df) __B,
6646 (__mmask8)
6647 __U);
6648}
6649
6650extern __inline __m128d
6651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6652_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6653 __m128d __B)
6654{
6655 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6656 /* idx */ ,
6657 (__v2df) __A,
6658 (__v2df) __B,
6659 (__mmask8)
6660 __U);
6661}
6662
6663extern __inline __m128
6664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6665_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6666{
6667 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6668 /* idx */ ,
6669 (__v4sf) __A,
6670 (__v4sf) __B,
6671 (__mmask8) -1);
6672}
6673
6674extern __inline __m128
6675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6676_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6677 __m128 __B)
6678{
6679 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6680 /* idx */ ,
6681 (__v4sf) __A,
6682 (__v4sf) __B,
6683 (__mmask8) __U);
6684}
6685
6686extern __inline __m128
6687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6688_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6689 __m128 __B)
6690{
6691 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6692 (__v4si) __I
6693 /* idx */ ,
6694 (__v4sf) __B,
6695 (__mmask8) __U);
6696}
6697
6698extern __inline __m128
6699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6700_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6701 __m128 __B)
6702{
6703 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6704 /* idx */ ,
6705 (__v4sf) __A,
6706 (__v4sf) __B,
6707 (__mmask8)
6708 __U);
6709}
6710
6711extern __inline __m128i
6712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6713_mm_srav_epi64 (__m128i __X, __m128i __Y)
6714{
6715 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6716 (__v2di) __Y,
6717 (__v2di)
a25a7887 6718 _mm_setzero_si128 (),
936c0fe4
AI
6719 (__mmask8) -1);
6720}
6721
6722extern __inline __m128i
6723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6725 __m128i __Y)
6726{
6727 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6728 (__v2di) __Y,
6729 (__v2di) __W,
6730 (__mmask8) __U);
6731}
6732
6733extern __inline __m128i
6734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6736{
6737 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6738 (__v2di) __Y,
6739 (__v2di)
a25a7887 6740 _mm_setzero_si128 (),
936c0fe4
AI
6741 (__mmask8) __U);
6742}
6743
6744extern __inline __m256i
6745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6747 __m256i __Y)
6748{
6749 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6750 (__v8si) __Y,
6751 (__v8si) __W,
6752 (__mmask8) __U);
6753}
6754
6755extern __inline __m256i
6756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6757_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6758{
6759 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6760 (__v8si) __Y,
6761 (__v8si)
6762 _mm256_setzero_si256 (),
6763 (__mmask8) __U);
6764}
6765
6766extern __inline __m128i
6767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6768_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6769 __m128i __Y)
6770{
6771 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6772 (__v4si) __Y,
6773 (__v4si) __W,
6774 (__mmask8) __U);
6775}
6776
6777extern __inline __m128i
6778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6780{
6781 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6782 (__v4si) __Y,
6783 (__v4si)
6784 _mm_setzero_si128 (),
6785 (__mmask8) __U);
6786}
6787
6788extern __inline __m256i
6789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6791 __m256i __Y)
6792{
6793 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6794 (__v4di) __Y,
6795 (__v4di) __W,
6796 (__mmask8) __U);
6797}
6798
6799extern __inline __m256i
6800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6802{
6803 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6804 (__v4di) __Y,
6805 (__v4di)
6806 _mm256_setzero_si256 (),
6807 (__mmask8) __U);
6808}
6809
6810extern __inline __m128i
6811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6813 __m128i __Y)
6814{
6815 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6816 (__v2di) __Y,
6817 (__v2di) __W,
6818 (__mmask8) __U);
6819}
6820
6821extern __inline __m128i
6822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6823_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6824{
6825 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6826 (__v2di) __Y,
6827 (__v2di)
a25a7887 6828 _mm_setzero_si128 (),
936c0fe4
AI
6829 (__mmask8) __U);
6830}
6831
6832extern __inline __m256i
6833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6835 __m256i __Y)
6836{
6837 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6838 (__v8si) __Y,
6839 (__v8si) __W,
6840 (__mmask8) __U);
6841}
6842
6843extern __inline __m256i
6844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6846{
6847 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6848 (__v8si) __Y,
6849 (__v8si)
6850 _mm256_setzero_si256 (),
6851 (__mmask8) __U);
6852}
6853
6854extern __inline __m128i
6855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6857 __m128i __Y)
6858{
6859 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6860 (__v4si) __Y,
6861 (__v4si) __W,
6862 (__mmask8) __U);
6863}
6864
6865extern __inline __m128i
6866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6868{
6869 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6870 (__v4si) __Y,
6871 (__v4si)
6872 _mm_setzero_si128 (),
6873 (__mmask8) __U);
6874}
6875
6876extern __inline __m256i
6877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6879 __m256i __Y)
6880{
6881 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6882 (__v8si) __Y,
6883 (__v8si) __W,
6884 (__mmask8) __U);
6885}
6886
6887extern __inline __m256i
6888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6890{
6891 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6892 (__v8si) __Y,
6893 (__v8si)
6894 _mm256_setzero_si256 (),
6895 (__mmask8) __U);
6896}
6897
6898extern __inline __m128i
6899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6901 __m128i __Y)
6902{
6903 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6904 (__v4si) __Y,
6905 (__v4si) __W,
6906 (__mmask8) __U);
6907}
6908
6909extern __inline __m128i
6910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6912{
6913 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6914 (__v4si) __Y,
6915 (__v4si)
6916 _mm_setzero_si128 (),
6917 (__mmask8) __U);
6918}
6919
6920extern __inline __m256i
6921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6923 __m256i __Y)
6924{
6925 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6926 (__v4di) __Y,
6927 (__v4di) __W,
6928 (__mmask8) __U);
6929}
6930
6931extern __inline __m256i
6932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6934{
6935 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6936 (__v4di) __Y,
6937 (__v4di)
6938 _mm256_setzero_si256 (),
6939 (__mmask8) __U);
6940}
6941
6942extern __inline __m128i
6943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6945 __m128i __Y)
6946{
6947 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6948 (__v2di) __Y,
6949 (__v2di) __W,
6950 (__mmask8) __U);
6951}
6952
6953extern __inline __m128i
6954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6956{
6957 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6958 (__v2di) __Y,
6959 (__v2di)
a25a7887 6960 _mm_setzero_si128 (),
936c0fe4
AI
6961 (__mmask8) __U);
6962}
6963
6964extern __inline __m256i
6965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966_mm256_rolv_epi32 (__m256i __A, __m256i __B)
6967{
6968 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6969 (__v8si) __B,
6970 (__v8si)
6971 _mm256_setzero_si256 (),
6972 (__mmask8) -1);
6973}
6974
6975extern __inline __m256i
6976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6978 __m256i __B)
6979{
6980 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6981 (__v8si) __B,
6982 (__v8si) __W,
6983 (__mmask8) __U);
6984}
6985
6986extern __inline __m256i
6987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6989{
6990 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6991 (__v8si) __B,
6992 (__v8si)
6993 _mm256_setzero_si256 (),
6994 (__mmask8) __U);
6995}
6996
6997extern __inline __m128i
6998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999_mm_rolv_epi32 (__m128i __A, __m128i __B)
7000{
7001 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7002 (__v4si) __B,
7003 (__v4si)
7004 _mm_setzero_si128 (),
7005 (__mmask8) -1);
7006}
7007
7008extern __inline __m128i
7009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7011 __m128i __B)
7012{
7013 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7014 (__v4si) __B,
7015 (__v4si) __W,
7016 (__mmask8) __U);
7017}
7018
7019extern __inline __m128i
7020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7022{
7023 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7024 (__v4si) __B,
7025 (__v4si)
7026 _mm_setzero_si128 (),
7027 (__mmask8) __U);
7028}
7029
7030extern __inline __m256i
7031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032_mm256_rorv_epi32 (__m256i __A, __m256i __B)
7033{
7034 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7035 (__v8si) __B,
7036 (__v8si)
7037 _mm256_setzero_si256 (),
7038 (__mmask8) -1);
7039}
7040
7041extern __inline __m256i
7042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7044 __m256i __B)
7045{
7046 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7047 (__v8si) __B,
7048 (__v8si) __W,
7049 (__mmask8) __U);
7050}
7051
7052extern __inline __m256i
7053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7055{
7056 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7057 (__v8si) __B,
7058 (__v8si)
7059 _mm256_setzero_si256 (),
7060 (__mmask8) __U);
7061}
7062
7063extern __inline __m128i
7064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065_mm_rorv_epi32 (__m128i __A, __m128i __B)
7066{
7067 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7068 (__v4si) __B,
7069 (__v4si)
7070 _mm_setzero_si128 (),
7071 (__mmask8) -1);
7072}
7073
7074extern __inline __m128i
7075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7077 __m128i __B)
7078{
7079 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7080 (__v4si) __B,
7081 (__v4si) __W,
7082 (__mmask8) __U);
7083}
7084
7085extern __inline __m128i
7086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7088{
7089 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7090 (__v4si) __B,
7091 (__v4si)
7092 _mm_setzero_si128 (),
7093 (__mmask8) __U);
7094}
7095
7096extern __inline __m256i
7097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098_mm256_rolv_epi64 (__m256i __A, __m256i __B)
7099{
7100 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7101 (__v4di) __B,
7102 (__v4di)
7103 _mm256_setzero_si256 (),
7104 (__mmask8) -1);
7105}
7106
7107extern __inline __m256i
7108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7110 __m256i __B)
7111{
7112 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7113 (__v4di) __B,
7114 (__v4di) __W,
7115 (__mmask8) __U);
7116}
7117
7118extern __inline __m256i
7119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7121{
7122 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7123 (__v4di) __B,
7124 (__v4di)
7125 _mm256_setzero_si256 (),
7126 (__mmask8) __U);
7127}
7128
7129extern __inline __m128i
7130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131_mm_rolv_epi64 (__m128i __A, __m128i __B)
7132{
7133 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7134 (__v2di) __B,
7135 (__v2di)
a25a7887 7136 _mm_setzero_si128 (),
936c0fe4
AI
7137 (__mmask8) -1);
7138}
7139
7140extern __inline __m128i
7141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7143 __m128i __B)
7144{
7145 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7146 (__v2di) __B,
7147 (__v2di) __W,
7148 (__mmask8) __U);
7149}
7150
7151extern __inline __m128i
7152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7153_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7154{
7155 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7156 (__v2di) __B,
7157 (__v2di)
a25a7887 7158 _mm_setzero_si128 (),
936c0fe4
AI
7159 (__mmask8) __U);
7160}
7161
7162extern __inline __m256i
7163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164_mm256_rorv_epi64 (__m256i __A, __m256i __B)
7165{
7166 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7167 (__v4di) __B,
7168 (__v4di)
7169 _mm256_setzero_si256 (),
7170 (__mmask8) -1);
7171}
7172
7173extern __inline __m256i
7174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7176 __m256i __B)
7177{
7178 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7179 (__v4di) __B,
7180 (__v4di) __W,
7181 (__mmask8) __U);
7182}
7183
7184extern __inline __m256i
7185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7187{
7188 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7189 (__v4di) __B,
7190 (__v4di)
7191 _mm256_setzero_si256 (),
7192 (__mmask8) __U);
7193}
7194
7195extern __inline __m128i
7196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7197_mm_rorv_epi64 (__m128i __A, __m128i __B)
7198{
7199 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7200 (__v2di) __B,
7201 (__v2di)
a25a7887 7202 _mm_setzero_si128 (),
936c0fe4
AI
7203 (__mmask8) -1);
7204}
7205
7206extern __inline __m128i
7207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7208_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7209 __m128i __B)
7210{
7211 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7212 (__v2di) __B,
7213 (__v2di) __W,
7214 (__mmask8) __U);
7215}
7216
7217extern __inline __m128i
7218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7220{
7221 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7222 (__v2di) __B,
7223 (__v2di)
a25a7887 7224 _mm_setzero_si128 (),
936c0fe4
AI
7225 (__mmask8) __U);
7226}
7227
7228extern __inline __m256i
7229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7230_mm256_srav_epi64 (__m256i __X, __m256i __Y)
7231{
7232 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7233 (__v4di) __Y,
7234 (__v4di)
7235 _mm256_setzero_si256 (),
7236 (__mmask8) -1);
7237}
7238
7239extern __inline __m256i
7240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7241_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7242 __m256i __Y)
7243{
7244 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7245 (__v4di) __Y,
7246 (__v4di) __W,
7247 (__mmask8) __U);
7248}
7249
7250extern __inline __m256i
7251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7252_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7253{
7254 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7255 (__v4di) __Y,
7256 (__v4di)
7257 _mm256_setzero_si256 (),
7258 (__mmask8) __U);
7259}
7260
7261extern __inline __m256i
7262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7263_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7264 __m256i __B)
7265{
7266 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7267 (__v4di) __B,
7268 (__v4di) __W, __U);
7269}
7270
7271extern __inline __m256i
7272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7273_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7274{
7275 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7276 (__v4di) __B,
7277 (__v4di)
7278 _mm256_setzero_pd (),
7279 __U);
7280}
7281
7282extern __inline __m128i
7283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7285 __m128i __B)
7286{
7287 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7288 (__v2di) __B,
7289 (__v2di) __W, __U);
7290}
7291
7292extern __inline __m128i
7293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7294_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7295{
7296 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7297 (__v2di) __B,
7298 (__v2di)
7299 _mm_setzero_pd (),
7300 __U);
7301}
7302
7303extern __inline __m256i
7304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7305_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7306 __m256i __B)
7307{
7308 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7309 (__v4di) __B,
7310 (__v4di) __W, __U);
7311}
7312
7313extern __inline __m256i
7314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7315_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7316{
7317 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7318 (__v4di) __B,
7319 (__v4di)
7320 _mm256_setzero_pd (),
7321 __U);
7322}
7323
7324extern __inline __m128i
7325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7326_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7327 __m128i __B)
7328{
7329 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7330 (__v2di) __B,
7331 (__v2di) __W, __U);
7332}
7333
7334extern __inline __m128i
7335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7337{
7338 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7339 (__v2di) __B,
7340 (__v2di)
7341 _mm_setzero_pd (),
7342 __U);
7343}
7344
7345extern __inline __m256i
7346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7347_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7348 __m256i __B)
7349{
7350 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7351 (__v4di) __B,
7352 (__v4di) __W,
7353 (__mmask8) __U);
7354}
7355
7356extern __inline __m256i
7357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7358_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7359{
7360 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7361 (__v4di) __B,
7362 (__v4di)
7363 _mm256_setzero_si256 (),
7364 (__mmask8) __U);
7365}
7366
01fd9f8d
L
7367extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7368_mm256_or_epi64 (__m256i __A, __m256i __B)
7369{
7370 return (__m256i) ((__v4du)__A | (__v4du)__B);
7371}
7372
936c0fe4
AI
7373extern __inline __m128i
7374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7375_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7376{
7377 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7378 (__v2di) __B,
7379 (__v2di) __W,
7380 (__mmask8) __U);
7381}
7382
7383extern __inline __m128i
7384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7385_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7386{
7387 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7388 (__v2di) __B,
7389 (__v2di)
7390 _mm_setzero_si128 (),
7391 (__mmask8) __U);
7392}
7393
01fd9f8d
L
7394extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7395_mm_or_epi64 (__m128i __A, __m128i __B)
7396{
7397 return (__m128i) ((__v2du)__A | (__v2du)__B);
7398}
7399
936c0fe4
AI
7400extern __inline __m256i
7401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7402_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7403 __m256i __B)
7404{
7405 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7406 (__v4di) __B,
7407 (__v4di) __W,
7408 (__mmask8) __U);
7409}
7410
7411extern __inline __m256i
7412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7413_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7414{
7415 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7416 (__v4di) __B,
7417 (__v4di)
7418 _mm256_setzero_si256 (),
7419 (__mmask8) __U);
7420}
7421
01fd9f8d
L
7422extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7423_mm256_xor_epi64 (__m256i __A, __m256i __B)
7424{
7425 return (__m256i) ((__v4du)__A ^ (__v4du)__B);
7426}
7427
936c0fe4
AI
7428extern __inline __m128i
7429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7430_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7431 __m128i __B)
7432{
7433 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7434 (__v2di) __B,
7435 (__v2di) __W,
7436 (__mmask8) __U);
7437}
7438
7439extern __inline __m128i
7440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7442{
7443 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7444 (__v2di) __B,
7445 (__v2di)
7446 _mm_setzero_si128 (),
7447 (__mmask8) __U);
7448}
7449
01fd9f8d
L
7450extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7451_mm_xor_epi64 (__m128i __A, __m128i __B)
7452{
7453 return (__m128i) ((__v2du)__A ^ (__v2du)__B);
7454}
7455
936c0fe4
AI
7456extern __inline __m256d
7457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7459 __m256d __B)
7460{
7461 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7462 (__v4df) __B,
7463 (__v4df) __W,
7464 (__mmask8) __U);
7465}
7466
7467extern __inline __m256d
7468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7470{
7471 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7472 (__v4df) __B,
7473 (__v4df)
7474 _mm256_setzero_pd (),
7475 (__mmask8) __U);
7476}
7477
7478extern __inline __m256
7479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7480_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7481{
7482 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7483 (__v8sf) __B,
7484 (__v8sf) __W,
7485 (__mmask8) __U);
7486}
7487
7488extern __inline __m256
7489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7491{
7492 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7493 (__v8sf) __B,
7494 (__v8sf)
7495 _mm256_setzero_ps (),
7496 (__mmask8) __U);
7497}
7498
7499extern __inline __m128
7500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7502{
7503 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7504 (__v4sf) __B,
7505 (__v4sf) __W,
7506 (__mmask8) __U);
7507}
7508
7509extern __inline __m128
7510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7511_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7512{
7513 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7514 (__v4sf) __B,
7515 (__v4sf)
7516 _mm_setzero_ps (),
7517 (__mmask8) __U);
7518}
7519
7520extern __inline __m128d
7521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7522_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7523{
7524 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7525 (__v2df) __B,
7526 (__v2df) __W,
7527 (__mmask8) __U);
7528}
7529
7530extern __inline __m128d
7531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7533{
7534 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7535 (__v2df) __B,
7536 (__v2df)
7537 _mm_setzero_pd (),
7538 (__mmask8) __U);
7539}
7540
7541extern __inline __m256d
7542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7543_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7544 __m256d __B)
7545{
7546 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7547 (__v4df) __B,
7548 (__v4df) __W,
7549 (__mmask8) __U);
7550}
7551
7552extern __inline __m256d
7553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7554_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7555 __m256d __B)
7556{
7557 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7558 (__v4df) __B,
7559 (__v4df) __W,
7560 (__mmask8) __U);
7561}
7562
7563extern __inline __m256d
7564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7566{
7567 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7568 (__v4df) __B,
7569 (__v4df)
7570 _mm256_setzero_pd (),
7571 (__mmask8) __U);
7572}
7573
7574extern __inline __m256
7575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7576_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7577{
7578 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7579 (__v8sf) __B,
7580 (__v8sf) __W,
7581 (__mmask8) __U);
7582}
7583
7584extern __inline __m256d
7585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7586_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7587{
7588 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7589 (__v4df) __B,
7590 (__v4df)
7591 _mm256_setzero_pd (),
7592 (__mmask8) __U);
7593}
7594
7595extern __inline __m256
7596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7598{
7599 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7600 (__v8sf) __B,
7601 (__v8sf) __W,
7602 (__mmask8) __U);
7603}
7604
7605extern __inline __m256
7606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7607_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7608{
7609 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7610 (__v8sf) __B,
7611 (__v8sf)
7612 _mm256_setzero_ps (),
7613 (__mmask8) __U);
7614}
7615
7616extern __inline __m256
7617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7618_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7619{
7620 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7621 (__v8sf) __B,
7622 (__v8sf)
7623 _mm256_setzero_ps (),
7624 (__mmask8) __U);
7625}
7626
7627extern __inline __m128
7628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7629_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7630{
7631 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7632 (__v4sf) __B,
7633 (__v4sf) __W,
7634 (__mmask8) __U);
7635}
7636
7637extern __inline __m128
7638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7639_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7640{
7641 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7642 (__v4sf) __B,
7643 (__v4sf) __W,
7644 (__mmask8) __U);
7645}
7646
7647extern __inline __m128
7648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7649_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7650{
7651 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7652 (__v4sf) __B,
7653 (__v4sf)
7654 _mm_setzero_ps (),
7655 (__mmask8) __U);
7656}
7657
7658extern __inline __m128
7659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7660_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7661{
7662 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7663 (__v4sf) __B,
7664 (__v4sf)
7665 _mm_setzero_ps (),
7666 (__mmask8) __U);
7667}
7668
7669extern __inline __m128
7670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7671_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7672{
7673 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7674 (__v4sf) __B,
7675 (__v4sf) __W,
7676 (__mmask8) __U);
7677}
7678
7679extern __inline __m128
7680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7681_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7682{
7683 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7684 (__v4sf) __B,
7685 (__v4sf)
7686 _mm_setzero_ps (),
7687 (__mmask8) __U);
7688}
7689
7690extern __inline __m128d
7691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7692_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7693{
7694 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7695 (__v2df) __B,
7696 (__v2df) __W,
7697 (__mmask8) __U);
7698}
7699
7700extern __inline __m128d
7701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7702_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7703{
7704 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7705 (__v2df) __B,
7706 (__v2df)
7707 _mm_setzero_pd (),
7708 (__mmask8) __U);
7709}
7710
7711extern __inline __m128d
7712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7713_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7714{
7715 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7716 (__v2df) __B,
7717 (__v2df) __W,
7718 (__mmask8) __U);
7719}
7720
7721extern __inline __m128d
7722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7723_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7724{
7725 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7726 (__v2df) __B,
7727 (__v2df)
7728 _mm_setzero_pd (),
7729 (__mmask8) __U);
7730}
7731
7732extern __inline __m128d
7733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7734_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7735{
7736 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7737 (__v2df) __B,
7738 (__v2df) __W,
7739 (__mmask8) __U);
7740}
7741
7742extern __inline __m128d
7743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7745{
7746 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7747 (__v2df) __B,
7748 (__v2df)
7749 _mm_setzero_pd (),
7750 (__mmask8) __U);
7751}
7752
7753extern __inline __m256
7754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7755_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7756{
7757 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7758 (__v8sf) __B,
7759 (__v8sf) __W,
7760 (__mmask8) __U);
7761}
7762
7763extern __inline __m256
7764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7765_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7766{
7767 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7768 (__v8sf) __B,
7769 (__v8sf)
7770 _mm256_setzero_ps (),
7771 (__mmask8) __U);
7772}
7773
7774extern __inline __m256d
7775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7776_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7777 __m256d __B)
7778{
7779 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7780 (__v4df) __B,
7781 (__v4df) __W,
7782 (__mmask8) __U);
7783}
7784
7785extern __inline __m256d
7786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7788{
7789 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7790 (__v4df) __B,
7791 (__v4df)
7792 _mm256_setzero_pd (),
7793 (__mmask8) __U);
7794}
7795
7796extern __inline __m256i
7797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7798_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7799{
7800 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7801 (__v4di) __B,
7802 (__v4di)
7803 _mm256_setzero_si256 (),
7804 __M);
7805}
7806
7807extern __inline __m256i
7808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7809_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7810 __m256i __B)
7811{
7812 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7813 (__v4di) __B,
7814 (__v4di) __W, __M);
7815}
7816
7817extern __inline __m256i
7818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7819_mm256_min_epi64 (__m256i __A, __m256i __B)
7820{
7821 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7822 (__v4di) __B,
7823 (__v4di)
7824 _mm256_setzero_si256 (),
7825 (__mmask8) -1);
7826}
7827
7828extern __inline __m256i
7829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7830_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7831 __m256i __B)
7832{
7833 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7834 (__v4di) __B,
7835 (__v4di) __W, __M);
7836}
7837
7838extern __inline __m256i
7839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7841{
7842 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7843 (__v4di) __B,
7844 (__v4di)
7845 _mm256_setzero_si256 (),
7846 __M);
7847}
7848
7849extern __inline __m256i
7850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7851_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7852{
7853 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7854 (__v4di) __B,
7855 (__v4di)
7856 _mm256_setzero_si256 (),
7857 __M);
7858}
7859
7860extern __inline __m256i
7861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862_mm256_max_epi64 (__m256i __A, __m256i __B)
7863{
7864 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7865 (__v4di) __B,
7866 (__v4di)
7867 _mm256_setzero_si256 (),
7868 (__mmask8) -1);
7869}
7870
7871extern __inline __m256i
7872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7873_mm256_max_epu64 (__m256i __A, __m256i __B)
7874{
7875 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7876 (__v4di) __B,
7877 (__v4di)
7878 _mm256_setzero_si256 (),
7879 (__mmask8) -1);
7880}
7881
7882extern __inline __m256i
7883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7884_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7885 __m256i __B)
7886{
7887 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7888 (__v4di) __B,
7889 (__v4di) __W, __M);
7890}
7891
7892extern __inline __m256i
7893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7894_mm256_min_epu64 (__m256i __A, __m256i __B)
7895{
7896 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7897 (__v4di) __B,
7898 (__v4di)
7899 _mm256_setzero_si256 (),
7900 (__mmask8) -1);
7901}
7902
7903extern __inline __m256i
7904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7905_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7906 __m256i __B)
7907{
7908 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7909 (__v4di) __B,
7910 (__v4di) __W, __M);
7911}
7912
7913extern __inline __m256i
7914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7915_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7916{
7917 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7918 (__v4di) __B,
7919 (__v4di)
7920 _mm256_setzero_si256 (),
7921 __M);
7922}
7923
7924extern __inline __m256i
7925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7926_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7927{
7928 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7929 (__v8si) __B,
7930 (__v8si)
7931 _mm256_setzero_si256 (),
7932 __M);
7933}
7934
7935extern __inline __m256i
7936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7937_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7938 __m256i __B)
7939{
7940 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7941 (__v8si) __B,
7942 (__v8si) __W, __M);
7943}
7944
7945extern __inline __m256i
7946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7947_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7948{
7949 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7950 (__v8si) __B,
7951 (__v8si)
7952 _mm256_setzero_si256 (),
7953 __M);
7954}
7955
7956extern __inline __m256i
7957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7958_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7959 __m256i __B)
7960{
7961 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7962 (__v8si) __B,
7963 (__v8si) __W, __M);
7964}
7965
7966extern __inline __m256i
7967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7968_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7969{
7970 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7971 (__v8si) __B,
7972 (__v8si)
7973 _mm256_setzero_si256 (),
7974 __M);
7975}
7976
7977extern __inline __m256i
7978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7979_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7980 __m256i __B)
7981{
7982 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7983 (__v8si) __B,
7984 (__v8si) __W, __M);
7985}
7986
7987extern __inline __m256i
7988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7989_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7990{
7991 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7992 (__v8si) __B,
7993 (__v8si)
7994 _mm256_setzero_si256 (),
7995 __M);
7996}
7997
7998extern __inline __m256i
7999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8000_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8001 __m256i __B)
8002{
8003 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8004 (__v8si) __B,
8005 (__v8si) __W, __M);
8006}
8007
8008extern __inline __m128i
8009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8010_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8011{
8012 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8013 (__v2di) __B,
8014 (__v2di)
8015 _mm_setzero_si128 (),
8016 __M);
8017}
8018
8019extern __inline __m128i
8020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8021_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8022 __m128i __B)
8023{
8024 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8025 (__v2di) __B,
8026 (__v2di) __W, __M);
8027}
8028
8029extern __inline __m128i
8030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8031_mm_min_epi64 (__m128i __A, __m128i __B)
8032{
8033 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8034 (__v2di) __B,
8035 (__v2di)
a25a7887 8036 _mm_setzero_si128 (),
936c0fe4
AI
8037 (__mmask8) -1);
8038}
8039
8040extern __inline __m128i
8041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8042_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8043 __m128i __B)
8044{
8045 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8046 (__v2di) __B,
8047 (__v2di) __W, __M);
8048}
8049
8050extern __inline __m128i
8051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8052_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8053{
8054 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8055 (__v2di) __B,
8056 (__v2di)
8057 _mm_setzero_si128 (),
8058 __M);
8059}
8060
8061extern __inline __m128i
8062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8063_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8064{
8065 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8066 (__v2di) __B,
8067 (__v2di)
8068 _mm_setzero_si128 (),
8069 __M);
8070}
8071
8072extern __inline __m128i
8073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8074_mm_max_epi64 (__m128i __A, __m128i __B)
8075{
8076 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8077 (__v2di) __B,
8078 (__v2di)
a25a7887 8079 _mm_setzero_si128 (),
936c0fe4
AI
8080 (__mmask8) -1);
8081}
8082
8083extern __inline __m128i
8084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8085_mm_max_epu64 (__m128i __A, __m128i __B)
8086{
8087 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8088 (__v2di) __B,
8089 (__v2di)
a25a7887 8090 _mm_setzero_si128 (),
936c0fe4
AI
8091 (__mmask8) -1);
8092}
8093
8094extern __inline __m128i
8095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8097 __m128i __B)
8098{
8099 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8100 (__v2di) __B,
8101 (__v2di) __W, __M);
8102}
8103
8104extern __inline __m128i
8105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8106_mm_min_epu64 (__m128i __A, __m128i __B)
8107{
8108 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8109 (__v2di) __B,
8110 (__v2di)
a25a7887 8111 _mm_setzero_si128 (),
936c0fe4
AI
8112 (__mmask8) -1);
8113}
8114
8115extern __inline __m128i
8116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8117_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8118 __m128i __B)
8119{
8120 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8121 (__v2di) __B,
8122 (__v2di) __W, __M);
8123}
8124
8125extern __inline __m128i
8126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8127_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8128{
8129 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8130 (__v2di) __B,
8131 (__v2di)
8132 _mm_setzero_si128 (),
8133 __M);
8134}
8135
8136extern __inline __m128i
8137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8138_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8139{
8140 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8141 (__v4si) __B,
8142 (__v4si)
8143 _mm_setzero_si128 (),
8144 __M);
8145}
8146
8147extern __inline __m128i
8148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8149_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8150 __m128i __B)
8151{
8152 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8153 (__v4si) __B,
8154 (__v4si) __W, __M);
8155}
8156
8157extern __inline __m128i
8158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8159_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8160{
8161 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8162 (__v4si) __B,
8163 (__v4si)
8164 _mm_setzero_si128 (),
8165 __M);
8166}
8167
8168extern __inline __m128i
8169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8170_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8171 __m128i __B)
8172{
8173 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8174 (__v4si) __B,
8175 (__v4si) __W, __M);
8176}
8177
8178extern __inline __m128i
8179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8181{
8182 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8183 (__v4si) __B,
8184 (__v4si)
8185 _mm_setzero_si128 (),
8186 __M);
8187}
8188
8189extern __inline __m128i
8190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8191_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8192 __m128i __B)
8193{
8194 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8195 (__v4si) __B,
8196 (__v4si) __W, __M);
8197}
8198
8199extern __inline __m128i
8200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8201_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8202{
8203 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8204 (__v4si) __B,
8205 (__v4si)
8206 _mm_setzero_si128 (),
8207 __M);
8208}
8209
8210extern __inline __m128i
8211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8212_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8213 __m128i __B)
8214{
8215 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8216 (__v4si) __B,
8217 (__v4si) __W, __M);
8218}
8219
8220#ifndef __AVX512CD__
8221#pragma GCC push_options
8222#pragma GCC target("avx512vl,avx512cd")
8223#define __DISABLE_AVX512VLCD__
8224#endif
8225
8226extern __inline __m128i
8227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228_mm_broadcastmb_epi64 (__mmask8 __A)
8229{
8230 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8231}
8232
8233extern __inline __m256i
8234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8235_mm256_broadcastmb_epi64 (__mmask8 __A)
8236{
8237 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8238}
8239
8240extern __inline __m128i
8241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8242_mm_broadcastmw_epi32 (__mmask16 __A)
8243{
8244 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8245}
8246
8247extern __inline __m256i
8248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8249_mm256_broadcastmw_epi32 (__mmask16 __A)
8250{
8251 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8252}
8253
8254extern __inline __m256i
8255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8256_mm256_lzcnt_epi32 (__m256i __A)
8257{
8258 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8259 (__v8si)
8260 _mm256_setzero_si256 (),
8261 (__mmask8) -1);
8262}
8263
8264extern __inline __m256i
8265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8266_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8267{
8268 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8269 (__v8si) __W,
8270 (__mmask8) __U);
8271}
8272
8273extern __inline __m256i
8274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8275_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8276{
8277 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8278 (__v8si)
8279 _mm256_setzero_si256 (),
8280 (__mmask8) __U);
8281}
8282
8283extern __inline __m256i
8284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8285_mm256_lzcnt_epi64 (__m256i __A)
8286{
8287 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8288 (__v4di)
8289 _mm256_setzero_si256 (),
8290 (__mmask8) -1);
8291}
8292
8293extern __inline __m256i
8294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8295_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8296{
8297 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8298 (__v4di) __W,
8299 (__mmask8) __U);
8300}
8301
8302extern __inline __m256i
8303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8305{
8306 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8307 (__v4di)
8308 _mm256_setzero_si256 (),
8309 (__mmask8) __U);
8310}
8311
8312extern __inline __m256i
8313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314_mm256_conflict_epi64 (__m256i __A)
8315{
8316 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8317 (__v4di)
8318 _mm256_setzero_si256 (),
c42b0bdf 8319 (__mmask8) -1);
936c0fe4
AI
8320}
8321
8322extern __inline __m256i
8323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8324_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8325{
8326 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8327 (__v4di) __W,
8328 (__mmask8)
8329 __U);
8330}
8331
8332extern __inline __m256i
8333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8335{
8336 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8337 (__v4di)
8338 _mm256_setzero_si256 (),
8339 (__mmask8)
8340 __U);
8341}
8342
8343extern __inline __m256i
8344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8345_mm256_conflict_epi32 (__m256i __A)
8346{
8347 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8348 (__v8si)
8349 _mm256_setzero_si256 (),
c42b0bdf 8350 (__mmask8) -1);
936c0fe4
AI
8351}
8352
8353extern __inline __m256i
8354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8355_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8356{
8357 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8358 (__v8si) __W,
8359 (__mmask8)
8360 __U);
8361}
8362
8363extern __inline __m256i
8364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8366{
8367 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8368 (__v8si)
8369 _mm256_setzero_si256 (),
8370 (__mmask8)
8371 __U);
8372}
8373
8374extern __inline __m128i
8375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8376_mm_lzcnt_epi32 (__m128i __A)
8377{
8378 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8379 (__v4si)
8380 _mm_setzero_si128 (),
8381 (__mmask8) -1);
8382}
8383
8384extern __inline __m128i
8385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8387{
8388 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8389 (__v4si) __W,
8390 (__mmask8) __U);
8391}
8392
8393extern __inline __m128i
8394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8395_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8396{
8397 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8398 (__v4si)
8399 _mm_setzero_si128 (),
8400 (__mmask8) __U);
8401}
8402
8403extern __inline __m128i
8404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8405_mm_lzcnt_epi64 (__m128i __A)
8406{
8407 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8408 (__v2di)
a25a7887 8409 _mm_setzero_si128 (),
936c0fe4
AI
8410 (__mmask8) -1);
8411}
8412
8413extern __inline __m128i
8414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8415_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8416{
8417 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8418 (__v2di) __W,
8419 (__mmask8) __U);
8420}
8421
8422extern __inline __m128i
8423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8424_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8425{
8426 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8427 (__v2di)
a25a7887 8428 _mm_setzero_si128 (),
936c0fe4
AI
8429 (__mmask8) __U);
8430}
8431
8432extern __inline __m128i
8433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8434_mm_conflict_epi64 (__m128i __A)
8435{
8436 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8437 (__v2di)
a25a7887 8438 _mm_setzero_si128 (),
c42b0bdf 8439 (__mmask8) -1);
936c0fe4
AI
8440}
8441
8442extern __inline __m128i
8443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8444_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8445{
8446 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8447 (__v2di) __W,
8448 (__mmask8)
8449 __U);
8450}
8451
8452extern __inline __m128i
8453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8454_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8455{
8456 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8457 (__v2di)
a25a7887 8458 _mm_setzero_si128 (),
936c0fe4
AI
8459 (__mmask8)
8460 __U);
8461}
8462
8463extern __inline __m128i
8464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8465_mm_conflict_epi32 (__m128i __A)
8466{
8467 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8468 (__v4si)
8469 _mm_setzero_si128 (),
c42b0bdf 8470 (__mmask8) -1);
936c0fe4
AI
8471}
8472
8473extern __inline __m128i
8474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8475_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8476{
8477 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8478 (__v4si) __W,
8479 (__mmask8)
8480 __U);
8481}
8482
8483extern __inline __m128i
8484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8485_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8486{
8487 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8488 (__v4si)
8489 _mm_setzero_si128 (),
8490 (__mmask8)
8491 __U);
8492}
8493
8494#ifdef __DISABLE_AVX512VLCD__
8495#pragma GCC pop_options
8496#endif
8497
8498extern __inline __m256d
8499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8500_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8501 __m256d __B)
8502{
8503 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8504 (__v4df) __B,
8505 (__v4df) __W,
8506 (__mmask8) __U);
8507}
8508
8509extern __inline __m256d
8510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8511_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8512{
8513 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8514 (__v4df) __B,
8515 (__v4df)
8516 _mm256_setzero_pd (),
8517 (__mmask8) __U);
8518}
8519
8520extern __inline __m128d
8521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8522_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8523 __m128d __B)
8524{
8525 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8526 (__v2df) __B,
8527 (__v2df) __W,
8528 (__mmask8) __U);
8529}
8530
8531extern __inline __m128d
8532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8533_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8534{
8535 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8536 (__v2df) __B,
8537 (__v2df)
8538 _mm_setzero_pd (),
8539 (__mmask8) __U);
8540}
8541
8542extern __inline __m256
8543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8545 __m256 __B)
8546{
8547 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8548 (__v8sf) __B,
8549 (__v8sf) __W,
8550 (__mmask8) __U);
8551}
8552
8553extern __inline __m256d
8554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8556 __m256d __B)
8557{
8558 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8559 (__v4df) __B,
8560 (__v4df) __W,
8561 (__mmask8) __U);
8562}
8563
8564extern __inline __m256d
8565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8567{
8568 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8569 (__v4df) __B,
8570 (__v4df)
8571 _mm256_setzero_pd (),
8572 (__mmask8) __U);
8573}
8574
8575extern __inline __m128d
8576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8577_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8578 __m128d __B)
8579{
8580 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8581 (__v2df) __B,
8582 (__v2df) __W,
8583 (__mmask8) __U);
8584}
8585
8586extern __inline __m128d
8587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8588_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8589{
8590 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8591 (__v2df) __B,
8592 (__v2df)
8593 _mm_setzero_pd (),
8594 (__mmask8) __U);
8595}
8596
8597extern __inline __m256
8598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8599_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8600 __m256 __B)
8601{
8602 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8603 (__v8sf) __B,
8604 (__v8sf) __W,
8605 (__mmask8) __U);
8606}
8607
8608extern __inline __m256
8609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8610_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8611{
8612 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8613 (__v8sf) __B,
8614 (__v8sf)
8615 _mm256_setzero_ps (),
8616 (__mmask8) __U);
8617}
8618
8619extern __inline __m128
8620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8621_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8622{
8623 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8624 (__v4sf) __B,
8625 (__v4sf) __W,
8626 (__mmask8) __U);
8627}
8628
8629extern __inline __m128
8630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8631_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8632{
8633 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8634 (__v4sf) __B,
8635 (__v4sf)
8636 _mm_setzero_ps (),
8637 (__mmask8) __U);
8638}
8639
8640extern __inline __m128
8641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8643{
8644 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8645 (__v4sf) __W,
8646 (__mmask8) __U);
8647}
8648
8649extern __inline __m128
8650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8652{
8653 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8654 (__v4sf)
8655 _mm_setzero_ps (),
8656 (__mmask8) __U);
8657}
8658
8659extern __inline __m256
8660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8661_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8662{
8663 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8664 (__v8sf) __B,
8665 (__v8sf)
8666 _mm256_setzero_ps (),
8667 (__mmask8) __U);
8668}
8669
8670extern __inline __m256
8671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8673{
8674 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8675 (__v8sf) __W,
8676 (__mmask8) __U);
8677}
8678
8679extern __inline __m256
8680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8682{
8683 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8684 (__v8sf)
8685 _mm256_setzero_ps (),
8686 (__mmask8) __U);
8687}
8688
8689extern __inline __m128
8690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8691_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8692{
8693 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8694 (__v4sf) __B,
8695 (__v4sf) __W,
8696 (__mmask8) __U);
8697}
8698
8699extern __inline __m128
8700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8701_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8702{
8703 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8704 (__v4sf) __B,
8705 (__v4sf)
8706 _mm_setzero_ps (),
8707 (__mmask8) __U);
8708}
8709
8710extern __inline __m256i
8711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8713 __m128i __B)
8714{
8715 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8716 (__v4si) __B,
8717 (__v8si) __W,
8718 (__mmask8) __U);
8719}
8720
8721extern __inline __m256i
8722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8723_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8724{
8725 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8726 (__v4si) __B,
8727 (__v8si)
8728 _mm256_setzero_si256 (),
8729 (__mmask8) __U);
8730}
8731
8732extern __inline __m128i
8733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8734_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8735 __m128i __B)
8736{
8737 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8738 (__v4si) __B,
8739 (__v4si) __W,
8740 (__mmask8) __U);
8741}
8742
8743extern __inline __m128i
8744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8745_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8746{
8747 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8748 (__v4si) __B,
8749 (__v4si)
8750 _mm_setzero_si128 (),
8751 (__mmask8) __U);
8752}
8753
8754extern __inline __m256i
8755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8756_mm256_sra_epi64 (__m256i __A, __m128i __B)
8757{
8758 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8759 (__v2di) __B,
8760 (__v4di)
8761 _mm256_setzero_si256 (),
8762 (__mmask8) -1);
8763}
8764
8765extern __inline __m256i
8766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8767_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8768 __m128i __B)
8769{
8770 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8771 (__v2di) __B,
8772 (__v4di) __W,
8773 (__mmask8) __U);
8774}
8775
8776extern __inline __m256i
8777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8778_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8779{
8780 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8781 (__v2di) __B,
8782 (__v4di)
8783 _mm256_setzero_si256 (),
8784 (__mmask8) __U);
8785}
8786
8787extern __inline __m128i
8788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8789_mm_sra_epi64 (__m128i __A, __m128i __B)
8790{
8791 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8792 (__v2di) __B,
8793 (__v2di)
a25a7887 8794 _mm_setzero_si128 (),
936c0fe4
AI
8795 (__mmask8) -1);
8796}
8797
8798extern __inline __m128i
8799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8800_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8801 __m128i __B)
8802{
8803 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8804 (__v2di) __B,
8805 (__v2di) __W,
8806 (__mmask8) __U);
8807}
8808
8809extern __inline __m128i
8810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8812{
8813 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8814 (__v2di) __B,
8815 (__v2di)
a25a7887 8816 _mm_setzero_si128 (),
936c0fe4
AI
8817 (__mmask8) __U);
8818}
8819
8820extern __inline __m128i
8821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8822_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8823 __m128i __B)
8824{
8825 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8826 (__v4si) __B,
8827 (__v4si) __W,
8828 (__mmask8) __U);
8829}
8830
8831extern __inline __m128i
8832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8833_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8834{
8835 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8836 (__v4si) __B,
8837 (__v4si)
8838 _mm_setzero_si128 (),
8839 (__mmask8) __U);
8840}
8841
8842extern __inline __m128i
8843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8844_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8845 __m128i __B)
8846{
8847 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8848 (__v2di) __B,
8849 (__v2di) __W,
8850 (__mmask8) __U);
8851}
8852
8853extern __inline __m128i
8854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8855_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8856{
8857 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8858 (__v2di) __B,
8859 (__v2di)
a25a7887 8860 _mm_setzero_si128 (),
936c0fe4
AI
8861 (__mmask8) __U);
8862}
8863
8864extern __inline __m256i
8865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8866_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8867 __m128i __B)
8868{
8869 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8870 (__v4si) __B,
8871 (__v8si) __W,
8872 (__mmask8) __U);
8873}
8874
8875extern __inline __m256i
8876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8877_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8878{
8879 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8880 (__v4si) __B,
8881 (__v8si)
8882 _mm256_setzero_si256 (),
8883 (__mmask8) __U);
8884}
8885
8886extern __inline __m256i
8887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8888_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8889 __m128i __B)
8890{
8891 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8892 (__v2di) __B,
8893 (__v4di) __W,
8894 (__mmask8) __U);
8895}
8896
8897extern __inline __m256i
8898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8899_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8900{
8901 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8902 (__v2di) __B,
8903 (__v4di)
8904 _mm256_setzero_si256 (),
8905 (__mmask8) __U);
8906}
8907
8908extern __inline __m256
8909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8911 __m256 __Y)
8912{
8913 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8914 (__v8si) __X,
8915 (__v8sf) __W,
8916 (__mmask8) __U);
8917}
8918
8919extern __inline __m256
8920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8921_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8922{
8923 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8924 (__v8si) __X,
8925 (__v8sf)
8926 _mm256_setzero_ps (),
8927 (__mmask8) __U);
8928}
8929
8930extern __inline __m256d
8931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8932_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8933{
8934 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8935 (__v4di) __X,
8936 (__v4df)
8937 _mm256_setzero_pd (),
8938 (__mmask8) -1);
8939}
8940
8941extern __inline __m256d
8942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8943_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8944 __m256d __Y)
8945{
8946 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8947 (__v4di) __X,
8948 (__v4df) __W,
8949 (__mmask8) __U);
8950}
8951
8952extern __inline __m256d
8953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8954_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8955{
8956 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8957 (__v4di) __X,
8958 (__v4df)
8959 _mm256_setzero_pd (),
8960 (__mmask8) __U);
8961}
8962
8963extern __inline __m256d
8964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8965_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8966 __m256i __C)
8967{
8968 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8969 (__v4di) __C,
8970 (__v4df) __W,
8971 (__mmask8)
8972 __U);
8973}
8974
8975extern __inline __m256d
8976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8977_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8978{
8979 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8980 (__v4di) __C,
8981 (__v4df)
8982 _mm256_setzero_pd (),
8983 (__mmask8)
8984 __U);
8985}
8986
8987extern __inline __m256
8988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8989_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8990 __m256i __C)
8991{
8992 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8993 (__v8si) __C,
8994 (__v8sf) __W,
8995 (__mmask8) __U);
8996}
8997
8998extern __inline __m256
8999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9000_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
9001{
9002 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9003 (__v8si) __C,
9004 (__v8sf)
9005 _mm256_setzero_ps (),
9006 (__mmask8) __U);
9007}
9008
9009extern __inline __m128d
9010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9011_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
9012 __m128i __C)
9013{
9014 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9015 (__v2di) __C,
9016 (__v2df) __W,
9017 (__mmask8) __U);
9018}
9019
9020extern __inline __m128d
9021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9022_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
9023{
9024 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9025 (__v2di) __C,
9026 (__v2df)
9027 _mm_setzero_pd (),
9028 (__mmask8) __U);
9029}
9030
9031extern __inline __m128
9032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9033_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
9034 __m128i __C)
9035{
9036 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9037 (__v4si) __C,
9038 (__v4sf) __W,
9039 (__mmask8) __U);
9040}
9041
9042extern __inline __m128
9043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9044_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
9045{
9046 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9047 (__v4si) __C,
9048 (__v4sf)
9049 _mm_setzero_ps (),
9050 (__mmask8) __U);
9051}
9052
9053extern __inline __m256i
9054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9056{
9057 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9058 (__v8si) __B,
9059 (__v8si)
9060 _mm256_setzero_si256 (),
9061 __M);
9062}
9063
9064extern __inline __m256i
9065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9066_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9067{
9068 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9069 (__v4di) __X,
9070 (__v4di)
9071 _mm256_setzero_si256 (),
9072 __M);
9073}
9074
9075extern __inline __m256i
9076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9077_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9078 __m256i __B)
9079{
9080 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9081 (__v8si) __B,
9082 (__v8si) __W, __M);
9083}
9084
9085extern __inline __m128i
9086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9087_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9088{
9089 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9090 (__v4si) __B,
9091 (__v4si)
9092 _mm_setzero_si128 (),
9093 __M);
9094}
9095
9096extern __inline __m128i
9097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
20e363e4 9098_mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
936c0fe4
AI
9099 __m128i __B)
9100{
9101 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9102 (__v4si) __B,
9103 (__v4si) __W, __M);
9104}
9105
9106extern __inline __m256i
9107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9109 __m256i __Y)
9110{
9111 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9112 (__v8si) __Y,
9113 (__v4di) __W, __M);
9114}
9115
9116extern __inline __m256i
9117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9119{
9120 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9121 (__v8si) __Y,
9122 (__v4di)
9123 _mm256_setzero_si256 (),
9124 __M);
9125}
9126
9127extern __inline __m128i
9128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9130 __m128i __Y)
9131{
9132 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9133 (__v4si) __Y,
9134 (__v2di) __W, __M);
9135}
9136
9137extern __inline __m128i
9138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9139_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9140{
9141 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9142 (__v4si) __Y,
9143 (__v2di)
9144 _mm_setzero_si128 (),
9145 __M);
9146}
9147
395a191d
SP
9148extern __inline __m256i
9149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9150_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
9151{
9152 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9153 (__v4di) __X,
9154 (__v4di)
9155 _mm256_setzero_si256 (),
9156 (__mmask8) -1);
9157}
9158
936c0fe4
AI
9159extern __inline __m256i
9160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9162 __m256i __Y)
9163{
9164 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9165 (__v4di) __X,
9166 (__v4di) __W,
9167 __M);
9168}
9169
9170extern __inline __m256i
9171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9172_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9173 __m256i __Y)
9174{
9175 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9176 (__v8si) __Y,
9177 (__v4di) __W, __M);
9178}
9179
9180extern __inline __m256i
9181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9182_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9183{
9184 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9185 (__v8si) __X,
9186 (__v8si)
9187 _mm256_setzero_si256 (),
9188 __M);
9189}
9190
9191extern __inline __m256i
9192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9193_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9194{
9195 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9196 (__v8si) __Y,
9197 (__v4di)
9198 _mm256_setzero_si256 (),
9199 __M);
9200}
9201
9202extern __inline __m128i
9203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9204_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9205 __m128i __Y)
9206{
9207 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9208 (__v4si) __Y,
9209 (__v2di) __W, __M);
9210}
9211
9212extern __inline __m128i
9213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9214_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9215{
9216 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9217 (__v4si) __Y,
9218 (__v2di)
9219 _mm_setzero_si128 (),
9220 __M);
9221}
9222
395a191d
SP
9223extern __inline __m256i
9224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9225_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
9226{
9227 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9228 (__v8si) __X,
9229 (__v8si)
9230 _mm256_setzero_si256 (),
9231 (__mmask8) -1);
9232}
9233
936c0fe4
AI
9234extern __inline __m256i
9235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9237 __m256i __Y)
9238{
9239 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9240 (__v8si) __X,
9241 (__v8si) __W,
9242 __M);
9243}
9244
6b62f323
JJ
9245extern __inline __mmask8
9246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9247_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9248{
6b62f323
JJ
9249 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9250 (__v8si) __Y, 4,
936c0fe4
AI
9251 (__mmask8) __M);
9252}
9253
6b62f323
JJ
9254extern __inline __mmask8
9255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9256_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9257{
6b62f323
JJ
9258 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9259 (__v8si) __Y, 4,
9260 (__mmask8) -1);
936c0fe4
AI
9261}
9262
6b62f323
JJ
9263extern __inline __mmask8
9264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9265_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9266{
6b62f323
JJ
9267 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9268 (__v8si) __Y, 1,
9269 (__mmask8) __M);
936c0fe4
AI
9270}
9271
6b62f323
JJ
9272extern __inline __mmask8
9273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9274_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9275{
6b62f323
JJ
9276 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9277 (__v8si) __Y, 1,
9278 (__mmask8) -1);
936c0fe4
AI
9279}
9280
6b62f323
JJ
9281extern __inline __mmask8
9282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9284{
6b62f323
JJ
9285 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9286 (__v8si) __Y, 5,
9287 (__mmask8) __M);
936c0fe4
AI
9288}
9289
6b62f323
JJ
9290extern __inline __mmask8
9291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9292_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9293{
6b62f323
JJ
9294 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9295 (__v8si) __Y, 5,
9296 (__mmask8) -1);
936c0fe4
AI
9297}
9298
6b62f323
JJ
9299extern __inline __mmask8
9300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9301_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9302{
6b62f323
JJ
9303 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9304 (__v8si) __Y, 2,
9305 (__mmask8) __M);
936c0fe4
AI
9306}
9307
6b62f323
JJ
9308extern __inline __mmask8
9309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9310_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9311{
6b62f323
JJ
9312 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9313 (__v8si) __Y, 2,
9314 (__mmask8) -1);
936c0fe4
AI
9315}
9316
6b62f323
JJ
9317extern __inline __mmask8
9318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9319_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9320{
6b62f323
JJ
9321 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9322 (__v4di) __Y, 4,
9323 (__mmask8) __M);
936c0fe4
AI
9324}
9325
6b62f323
JJ
9326extern __inline __mmask8
9327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9328_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9329{
6b62f323
JJ
9330 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9331 (__v4di) __Y, 4,
9332 (__mmask8) -1);
936c0fe4
AI
9333}
9334
6b62f323
JJ
9335extern __inline __mmask8
9336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9337_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9338{
6b62f323
JJ
9339 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9340 (__v4di) __Y, 1,
9341 (__mmask8) __M);
936c0fe4
AI
9342}
9343
6b62f323
JJ
9344extern __inline __mmask8
9345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9346_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9347{
6b62f323
JJ
9348 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9349 (__v4di) __Y, 1,
9350 (__mmask8) -1);
936c0fe4
AI
9351}
9352
6b62f323
JJ
9353extern __inline __mmask8
9354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9355_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9356{
6b62f323
JJ
9357 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9358 (__v4di) __Y, 5,
9359 (__mmask8) __M);
936c0fe4
AI
9360}
9361
6b62f323
JJ
9362extern __inline __mmask8
9363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9364_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9365{
6b62f323
JJ
9366 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9367 (__v4di) __Y, 5,
9368 (__mmask8) -1);
936c0fe4
AI
9369}
9370
6b62f323
JJ
9371extern __inline __mmask8
9372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9373_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9374{
6b62f323
JJ
9375 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9376 (__v4di) __Y, 2,
9377 (__mmask8) __M);
936c0fe4
AI
9378}
9379
6b62f323
JJ
9380extern __inline __mmask8
9381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9382_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9383{
6b62f323
JJ
9384 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9385 (__v4di) __Y, 2,
9386 (__mmask8) -1);
936c0fe4
AI
9387}
9388
6b62f323
JJ
9389extern __inline __mmask8
9390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9391_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9392{
6b62f323
JJ
9393 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9394 (__v8si) __Y, 4,
9395 (__mmask8) __M);
936c0fe4
AI
9396}
9397
6b62f323
JJ
9398extern __inline __mmask8
9399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9400_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
9401{
9402 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9403 (__v8si) __Y, 4,
9404 (__mmask8) -1);
936c0fe4
AI
9405}
9406
6b62f323
JJ
9407extern __inline __mmask8
9408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9409_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9410{
6b62f323
JJ
9411 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9412 (__v8si) __Y, 1,
9413 (__mmask8) __M);
936c0fe4
AI
9414}
9415
6b62f323
JJ
9416extern __inline __mmask8
9417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9418_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9419{
6b62f323
JJ
9420 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9421 (__v8si) __Y, 1,
9422 (__mmask8) -1);
936c0fe4
AI
9423}
9424
6b62f323
JJ
9425extern __inline __mmask8
9426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9427_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9428{
6b62f323
JJ
9429 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9430 (__v8si) __Y, 5,
9431 (__mmask8) __M);
936c0fe4
AI
9432}
9433
6b62f323
JJ
9434extern __inline __mmask8
9435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9436_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9437{
6b62f323
JJ
9438 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9439 (__v8si) __Y, 5,
9440 (__mmask8) -1);
936c0fe4
AI
9441}
9442
6b62f323
JJ
9443extern __inline __mmask8
9444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9445_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9446{
6b62f323
JJ
9447 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9448 (__v8si) __Y, 2,
9449 (__mmask8) __M);
936c0fe4
AI
9450}
9451
6b62f323
JJ
9452extern __inline __mmask8
9453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9454_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9455{
6b62f323
JJ
9456 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9457 (__v8si) __Y, 2,
9458 (__mmask8) -1);
936c0fe4
AI
9459}
9460
6b62f323
JJ
9461extern __inline __mmask8
9462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9463_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9464{
6b62f323
JJ
9465 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9466 (__v4di) __Y, 4,
9467 (__mmask8) __M);
936c0fe4
AI
9468}
9469
6b62f323
JJ
9470extern __inline __mmask8
9471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9472_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9473{
6b62f323
JJ
9474 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9475 (__v4di) __Y, 4,
9476 (__mmask8) -1);
936c0fe4
AI
9477}
9478
6b62f323
JJ
9479extern __inline __mmask8
9480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9481_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9482{
6b62f323
JJ
9483 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9484 (__v4di) __Y, 1,
9485 (__mmask8) __M);
936c0fe4
AI
9486}
9487
6b62f323
JJ
9488extern __inline __mmask8
9489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9490_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9491{
6b62f323
JJ
9492 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9493 (__v4di) __Y, 1,
9494 (__mmask8) -1);
936c0fe4
AI
9495}
9496
6b62f323
JJ
9497extern __inline __mmask8
9498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9499_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9500{
6b62f323
JJ
9501 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9502 (__v4di) __Y, 5,
9503 (__mmask8) __M);
936c0fe4
AI
9504}
9505
6b62f323
JJ
9506extern __inline __mmask8
9507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9508_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9509{
6b62f323
JJ
9510 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9511 (__v4di) __Y, 5,
9512 (__mmask8) -1);
936c0fe4
AI
9513}
9514
6b62f323
JJ
9515extern __inline __mmask8
9516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9517_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9518{
6b62f323
JJ
9519 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9520 (__v4di) __Y, 2,
9521 (__mmask8) __M);
936c0fe4
AI
9522}
9523
6b62f323
JJ
9524extern __inline __mmask8
9525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9526_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9527{
6b62f323
JJ
9528 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9529 (__v4di) __Y, 2,
9530 (__mmask8) -1);
936c0fe4
AI
9531}
9532
6b62f323
JJ
9533extern __inline __mmask8
9534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9535_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9536{
6b62f323
JJ
9537 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9538 (__v4si) __Y, 4,
9539 (__mmask8) __M);
936c0fe4
AI
9540}
9541
6b62f323
JJ
9542extern __inline __mmask8
9543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9544_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9545{
6b62f323
JJ
9546 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9547 (__v4si) __Y, 4,
9548 (__mmask8) -1);
936c0fe4
AI
9549}
9550
6b62f323
JJ
9551extern __inline __mmask8
9552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9553_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9554{
6b62f323
JJ
9555 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9556 (__v4si) __Y, 1,
9557 (__mmask8) __M);
936c0fe4
AI
9558}
9559
6b62f323
JJ
9560extern __inline __mmask8
9561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9562_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9563{
6b62f323
JJ
9564 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9565 (__v4si) __Y, 1,
9566 (__mmask8) -1);
936c0fe4
AI
9567}
9568
6b62f323
JJ
9569extern __inline __mmask8
9570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9571_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9572{
6b62f323
JJ
9573 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9574 (__v4si) __Y, 5,
9575 (__mmask8) __M);
936c0fe4
AI
9576}
9577
6b62f323
JJ
9578extern __inline __mmask8
9579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9580_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9581{
6b62f323
JJ
9582 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9583 (__v4si) __Y, 5,
9584 (__mmask8) -1);
936c0fe4
AI
9585}
9586
6b62f323
JJ
9587extern __inline __mmask8
9588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9589_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9590{
6b62f323
JJ
9591 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9592 (__v4si) __Y, 2,
9593 (__mmask8) __M);
936c0fe4
AI
9594}
9595
6b62f323
JJ
9596extern __inline __mmask8
9597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9599{
6b62f323
JJ
9600 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9601 (__v4si) __Y, 2,
9602 (__mmask8) -1);
936c0fe4
AI
9603}
9604
6b62f323
JJ
9605extern __inline __mmask8
9606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9607_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9608{
6b62f323
JJ
9609 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9610 (__v2di) __Y, 4,
9611 (__mmask8) __M);
936c0fe4
AI
9612}
9613
6b62f323
JJ
9614extern __inline __mmask8
9615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9616_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9617{
6b62f323
JJ
9618 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9619 (__v2di) __Y, 4,
9620 (__mmask8) -1);
936c0fe4
AI
9621}
9622
6b62f323
JJ
9623extern __inline __mmask8
9624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9625_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9626{
6b62f323
JJ
9627 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9628 (__v2di) __Y, 1,
9629 (__mmask8) __M);
936c0fe4
AI
9630}
9631
6b62f323
JJ
9632extern __inline __mmask8
9633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9634_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9635{
6b62f323
JJ
9636 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9637 (__v2di) __Y, 1,
9638 (__mmask8) -1);
936c0fe4
AI
9639}
9640
6b62f323
JJ
9641extern __inline __mmask8
9642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9643_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9644{
6b62f323
JJ
9645 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9646 (__v2di) __Y, 5,
9647 (__mmask8) __M);
936c0fe4
AI
9648}
9649
6b62f323
JJ
9650extern __inline __mmask8
9651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9652_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9653{
6b62f323
JJ
9654 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9655 (__v2di) __Y, 5,
9656 (__mmask8) -1);
936c0fe4
AI
9657}
9658
6b62f323
JJ
9659extern __inline __mmask8
9660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9661_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9662{
6b62f323
JJ
9663 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9664 (__v2di) __Y, 2,
9665 (__mmask8) __M);
936c0fe4
AI
9666}
9667
6b62f323
JJ
9668extern __inline __mmask8
9669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9670_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9671{
6b62f323
JJ
9672 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9673 (__v2di) __Y, 2,
9674 (__mmask8) -1);
936c0fe4
AI
9675}
9676
6b62f323
JJ
9677extern __inline __mmask8
9678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9679_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9680{
6b62f323
JJ
9681 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9682 (__v4si) __Y, 4,
9683 (__mmask8) __M);
936c0fe4
AI
9684}
9685
6b62f323
JJ
9686extern __inline __mmask8
9687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9688_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
9689{
9690 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9691 (__v4si) __Y, 4,
9692 (__mmask8) -1);
9693}
9694
9695extern __inline __mmask8
9696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9697_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9698{
9699 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9700 (__v4si) __Y, 1,
9701 (__mmask8) __M);
9702}
9703
9704extern __inline __mmask8
9705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9706_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9707{
6b62f323
JJ
9708 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9709 (__v4si) __Y, 1,
9710 (__mmask8) -1);
936c0fe4
AI
9711}
9712
6b62f323
JJ
9713extern __inline __mmask8
9714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9715_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9716{
6b62f323
JJ
9717 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9718 (__v4si) __Y, 5,
9719 (__mmask8) __M);
936c0fe4
AI
9720}
9721
6b62f323
JJ
9722extern __inline __mmask8
9723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9724_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9725{
6b62f323
JJ
9726 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9727 (__v4si) __Y, 5,
9728 (__mmask8) -1);
936c0fe4
AI
9729}
9730
6b62f323
JJ
9731extern __inline __mmask8
9732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9733_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9734{
6b62f323
JJ
9735 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9736 (__v4si) __Y, 2,
9737 (__mmask8) __M);
936c0fe4
AI
9738}
9739
6b62f323
JJ
9740extern __inline __mmask8
9741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9742_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9743{
6b62f323
JJ
9744 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9745 (__v4si) __Y, 2,
9746 (__mmask8) -1);
936c0fe4
AI
9747}
9748
6b62f323
JJ
9749extern __inline __mmask8
9750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9751_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9752{
6b62f323
JJ
9753 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9754 (__v2di) __Y, 4,
9755 (__mmask8) __M);
936c0fe4
AI
9756}
9757
6b62f323
JJ
9758extern __inline __mmask8
9759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9760_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9761{
6b62f323
JJ
9762 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9763 (__v2di) __Y, 4,
9764 (__mmask8) -1);
936c0fe4
AI
9765}
9766
6b62f323
JJ
9767extern __inline __mmask8
9768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9770{
6b62f323
JJ
9771 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9772 (__v2di) __Y, 1,
9773 (__mmask8) __M);
936c0fe4
AI
9774}
9775
6b62f323
JJ
9776extern __inline __mmask8
9777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9779{
6b62f323
JJ
9780 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9781 (__v2di) __Y, 1,
9782 (__mmask8) -1);
936c0fe4
AI
9783}
9784
6b62f323
JJ
9785extern __inline __mmask8
9786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9787_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9788{
6b62f323
JJ
9789 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9790 (__v2di) __Y, 5,
9791 (__mmask8) __M);
936c0fe4
AI
9792}
9793
6b62f323
JJ
9794extern __inline __mmask8
9795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9797{
6b62f323
JJ
9798 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9799 (__v2di) __Y, 5,
9800 (__mmask8) -1);
936c0fe4
AI
9801}
9802
6b62f323
JJ
9803extern __inline __mmask8
9804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9806{
6b62f323
JJ
9807 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9808 (__v2di) __Y, 2,
9809 (__mmask8) __M);
936c0fe4
AI
9810}
9811
6b62f323
JJ
9812extern __inline __mmask8
9813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9815{
6b62f323
JJ
9816 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9817 (__v2di) __Y, 2,
9818 (__mmask8) -1);
936c0fe4
AI
9819}
9820
6b62f323 9821#ifdef __OPTIMIZE__
395a191d
SP
9822extern __inline __m256i
9823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824_mm256_permutex_epi64 (__m256i __X, const int __I)
9825{
9826 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9827 __I,
9828 (__v4di)
9829 _mm256_setzero_si256(),
9830 (__mmask8) -1);
9831}
9832
6b62f323 9833extern __inline __m256i
936c0fe4 9834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9835_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9836 __m256i __X, const int __I)
936c0fe4 9837{
6b62f323
JJ
9838 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9839 __I,
9840 (__v4di) __W,
9841 (__mmask8) __M);
936c0fe4
AI
9842}
9843
6b62f323 9844extern __inline __m256i
936c0fe4 9845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9846_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
936c0fe4 9847{
6b62f323
JJ
9848 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9849 __I,
9850 (__v4di)
9851 _mm256_setzero_si256 (),
9852 (__mmask8) __M);
936c0fe4
AI
9853}
9854
6b62f323 9855extern __inline __m256d
936c0fe4 9856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9857_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9858 __m256d __B, const int __imm)
936c0fe4 9859{
6b62f323
JJ
9860 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9861 (__v4df) __B, __imm,
9862 (__v4df) __W,
9863 (__mmask8) __U);
936c0fe4
AI
9864}
9865
6b62f323 9866extern __inline __m256d
936c0fe4 9867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9868_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9869 const int __imm)
936c0fe4 9870{
6b62f323
JJ
9871 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9872 (__v4df) __B, __imm,
9873 (__v4df)
9874 _mm256_setzero_pd (),
9875 (__mmask8) __U);
936c0fe4
AI
9876}
9877
6b62f323 9878extern __inline __m128d
936c0fe4 9879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9880_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9881 __m128d __B, const int __imm)
936c0fe4 9882{
6b62f323
JJ
9883 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9884 (__v2df) __B, __imm,
9885 (__v2df) __W,
9886 (__mmask8) __U);
936c0fe4
AI
9887}
9888
6b62f323 9889extern __inline __m128d
936c0fe4 9890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9891_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9892 const int __imm)
936c0fe4 9893{
6b62f323
JJ
9894 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9895 (__v2df) __B, __imm,
9896 (__v2df)
9897 _mm_setzero_pd (),
9898 (__mmask8) __U);
936c0fe4
AI
9899}
9900
9901extern __inline __m256
9902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9903_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9904 __m256 __B, const int __imm)
936c0fe4 9905{
6b62f323
JJ
9906 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9907 (__v8sf) __B, __imm,
9908 (__v8sf) __W,
9909 (__mmask8) __U);
936c0fe4
AI
9910}
9911
6b62f323 9912extern __inline __m256
936c0fe4 9913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9914_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9915 const int __imm)
936c0fe4 9916{
6b62f323
JJ
9917 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9918 (__v8sf) __B, __imm,
9919 (__v8sf)
9920 _mm256_setzero_ps (),
9921 (__mmask8) __U);
936c0fe4
AI
9922}
9923
6b62f323 9924extern __inline __m128
936c0fe4 9925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9926_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9927 const int __imm)
936c0fe4 9928{
6b62f323
JJ
9929 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9930 (__v4sf) __B, __imm,
9931 (__v4sf) __W,
9932 (__mmask8) __U);
936c0fe4
AI
9933}
9934
6b62f323 9935extern __inline __m128
936c0fe4 9936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9937_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9938 const int __imm)
936c0fe4 9939{
6b62f323
JJ
9940 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9941 (__v4sf) __B, __imm,
9942 (__v4sf)
9943 _mm_setzero_ps (),
9944 (__mmask8) __U);
936c0fe4
AI
9945}
9946
6b62f323 9947extern __inline __m256i
936c0fe4 9948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9949_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
936c0fe4 9950{
6b62f323
JJ
9951 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9952 (__v4si) __B,
9953 __imm,
9954 (__v8si)
9955 _mm256_setzero_si256 (),
9956 (__mmask8) -1);
936c0fe4
AI
9957}
9958
6b62f323 9959extern __inline __m256i
936c0fe4 9960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9961_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9962 __m128i __B, const int __imm)
936c0fe4 9963{
6b62f323
JJ
9964 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9965 (__v4si) __B,
9966 __imm,
9967 (__v8si) __W,
9968 (__mmask8)
9969 __U);
936c0fe4
AI
9970}
9971
6b62f323 9972extern __inline __m256i
936c0fe4 9973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9974_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9975 const int __imm)
936c0fe4 9976{
6b62f323
JJ
9977 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9978 (__v4si) __B,
9979 __imm,
9980 (__v8si)
9981 _mm256_setzero_si256 (),
9982 (__mmask8)
9983 __U);
936c0fe4
AI
9984}
9985
6b62f323 9986extern __inline __m256
936c0fe4 9987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9988_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
936c0fe4 9989{
6b62f323
JJ
9990 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9991 (__v4sf) __B,
936c0fe4 9992 __imm,
6b62f323
JJ
9993 (__v8sf)
9994 _mm256_setzero_ps (),
936c0fe4
AI
9995 (__mmask8) -1);
9996}
9997
6b62f323 9998extern __inline __m256
936c0fe4 9999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10000_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10001 __m128 __B, const int __imm)
936c0fe4 10002{
6b62f323
JJ
10003 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10004 (__v4sf) __B,
936c0fe4 10005 __imm,
6b62f323 10006 (__v8sf) __W,
936c0fe4
AI
10007 (__mmask8) __U);
10008}
10009
6b62f323 10010extern __inline __m256
936c0fe4 10011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10012_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
10013 const int __imm)
936c0fe4 10014{
6b62f323
JJ
10015 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10016 (__v4sf) __B,
936c0fe4 10017 __imm,
6b62f323
JJ
10018 (__v8sf)
10019 _mm256_setzero_ps (),
936c0fe4
AI
10020 (__mmask8) __U);
10021}
10022
6b62f323 10023extern __inline __m128i
936c0fe4 10024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10025_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
936c0fe4 10026{
6b62f323
JJ
10027 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10028 __imm,
10029 (__v4si)
10030 _mm_setzero_si128 (),
10031 (__mmask8) -1);
936c0fe4
AI
10032}
10033
6b62f323 10034extern __inline __m128i
936c0fe4 10035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10036_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
10037 const int __imm)
936c0fe4 10038{
6b62f323
JJ
10039 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10040 __imm,
10041 (__v4si) __W,
10042 (__mmask8)
10043 __U);
936c0fe4
AI
10044}
10045
6b62f323 10046extern __inline __m128i
936c0fe4 10047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10048_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
10049 const int __imm)
936c0fe4 10050{
6b62f323
JJ
10051 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10052 __imm,
10053 (__v4si)
10054 _mm_setzero_si128 (),
10055 (__mmask8)
10056 __U);
936c0fe4
AI
10057}
10058
10059extern __inline __m128
10060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10061_mm256_extractf32x4_ps (__m256 __A, const int __imm)
936c0fe4 10062{
6b62f323
JJ
10063 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10064 __imm,
10065 (__v4sf)
10066 _mm_setzero_ps (),
10067 (__mmask8) -1);
936c0fe4
AI
10068}
10069
10070extern __inline __m128
10071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10072_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
10073 const int __imm)
936c0fe4 10074{
6b62f323
JJ
10075 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10076 __imm,
10077 (__v4sf) __W,
10078 (__mmask8)
10079 __U);
936c0fe4
AI
10080}
10081
10082extern __inline __m128
10083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10084_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
10085 const int __imm)
10086{
10087 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10088 __imm,
10089 (__v4sf)
10090 _mm_setzero_ps (),
10091 (__mmask8)
10092 __U);
10093}
10094
10095extern __inline __m256i
10096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10097_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
10098{
10099 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10100 (__v4di) __B,
10101 __imm,
10102 (__v4di)
10103 _mm256_setzero_si256 (),
10104 (__mmask8) -1);
10105}
10106
10107extern __inline __m256i
10108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10109_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
10110 __m256i __B, const int __imm)
936c0fe4 10111{
6b62f323
JJ
10112 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10113 (__v4di) __B,
10114 __imm,
10115 (__v4di) __W,
10116 (__mmask8) __U);
936c0fe4
AI
10117}
10118
6b62f323 10119extern __inline __m256i
936c0fe4 10120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10121_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
10122 const int __imm)
936c0fe4 10123{
6b62f323
JJ
10124 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10125 (__v4di) __B,
10126 __imm,
10127 (__v4di)
10128 _mm256_setzero_si256 (),
10129 (__mmask8) __U);
936c0fe4
AI
10130}
10131
6b62f323 10132extern __inline __m256i
936c0fe4 10133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10134_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 10135{
6b62f323
JJ
10136 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10137 (__v8si) __B,
10138 __imm,
10139 (__v8si)
10140 _mm256_setzero_si256 (),
10141 (__mmask8) -1);
936c0fe4
AI
10142}
10143
6b62f323 10144extern __inline __m256i
936c0fe4 10145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10146_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
10147 __m256i __B, const int __imm)
936c0fe4 10148{
6b62f323
JJ
10149 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10150 (__v8si) __B,
10151 __imm,
10152 (__v8si) __W,
10153 (__mmask8) __U);
936c0fe4
AI
10154}
10155
6b62f323 10156extern __inline __m256i
936c0fe4 10157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10158_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
10159 const int __imm)
936c0fe4 10160{
6b62f323
JJ
10161 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10162 (__v8si) __B,
10163 __imm,
10164 (__v8si)
10165 _mm256_setzero_si256 (),
10166 (__mmask8) __U);
936c0fe4
AI
10167}
10168
6b62f323 10169extern __inline __m256d
936c0fe4 10170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10171_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
936c0fe4 10172{
6b62f323
JJ
10173 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10174 (__v4df) __B,
10175 __imm,
10176 (__v4df)
10177 _mm256_setzero_pd (),
10178 (__mmask8) -1);
936c0fe4
AI
10179}
10180
6b62f323 10181extern __inline __m256d
936c0fe4 10182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10183_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
10184 __m256d __B, const int __imm)
936c0fe4 10185{
6b62f323
JJ
10186 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10187 (__v4df) __B,
10188 __imm,
10189 (__v4df) __W,
10190 (__mmask8) __U);
936c0fe4
AI
10191}
10192
6b62f323 10193extern __inline __m256d
936c0fe4 10194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10195_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
10196 const int __imm)
936c0fe4 10197{
6b62f323
JJ
10198 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10199 (__v4df) __B,
10200 __imm,
10201 (__v4df)
10202 _mm256_setzero_pd (),
10203 (__mmask8) __U);
936c0fe4
AI
10204}
10205
6b62f323 10206extern __inline __m256
936c0fe4 10207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10208_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
936c0fe4 10209{
6b62f323
JJ
10210 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10211 (__v8sf) __B,
10212 __imm,
10213 (__v8sf)
10214 _mm256_setzero_ps (),
10215 (__mmask8) -1);
936c0fe4
AI
10216}
10217
6b62f323 10218extern __inline __m256
936c0fe4 10219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10220_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10221 __m256 __B, const int __imm)
936c0fe4 10222{
6b62f323
JJ
10223 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10224 (__v8sf) __B,
10225 __imm,
10226 (__v8sf) __W,
10227 (__mmask8) __U);
936c0fe4
AI
10228}
10229
6b62f323 10230extern __inline __m256
936c0fe4 10231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10232_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
10233 const int __imm)
936c0fe4 10234{
6b62f323
JJ
10235 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10236 (__v8sf) __B,
10237 __imm,
10238 (__v8sf)
10239 _mm256_setzero_ps (),
10240 (__mmask8) __U);
936c0fe4
AI
10241}
10242
6b62f323 10243extern __inline __m256d
936c0fe4 10244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc 10245_mm256_fixupimm_pd (__m256d __A, __m256i __B,
6b62f323 10246 const int __imm)
936c0fe4 10247{
ce2ad8cc
WX
10248 return (__m256d) __builtin_ia32_fixupimmpd256 ((__v4df) __A,
10249 (__v4di) __B,
10250 __imm);
936c0fe4
AI
10251}
10252
6b62f323 10253extern __inline __m256d
936c0fe4 10254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc
WX
10255_mm256_mask_fixupimm_pd (__m256d __W, __mmask8 __U, __m256d __A,
10256 __m256i __B, const int __imm)
936c0fe4 10257{
6b62f323 10258 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
ce2ad8cc 10259 (__v4di) __B,
6b62f323 10260 __imm,
ce2ad8cc 10261 (__v4df) __W,
6b62f323 10262 (__mmask8) __U);
936c0fe4
AI
10263}
10264
10265extern __inline __m256d
10266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc
WX
10267_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A,
10268 __m256i __B, const int __imm)
936c0fe4 10269{
6b62f323 10270 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
ce2ad8cc 10271 (__v4di) __B,
6b62f323
JJ
10272 __imm,
10273 (__mmask8) __U);
936c0fe4
AI
10274}
10275
6b62f323 10276extern __inline __m256
936c0fe4 10277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc 10278_mm256_fixupimm_ps (__m256 __A, __m256i __B,
6b62f323 10279 const int __imm)
936c0fe4 10280{
ce2ad8cc
WX
10281 return (__m256) __builtin_ia32_fixupimmps256 ((__v8sf) __A,
10282 (__v8si) __B,
10283 __imm);
936c0fe4
AI
10284}
10285
6b62f323 10286extern __inline __m256
936c0fe4 10287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc
WX
10288_mm256_mask_fixupimm_ps (__m256 __W, __mmask8 __U, __m256 __A,
10289 __m256i __B, const int __imm)
936c0fe4 10290{
6b62f323 10291 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
ce2ad8cc 10292 (__v8si) __B,
6b62f323 10293 __imm,
ce2ad8cc 10294 (__v8sf) __W,
6b62f323 10295 (__mmask8) __U);
936c0fe4
AI
10296}
10297
6b62f323 10298extern __inline __m256
936c0fe4 10299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc
WX
10300_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A,
10301 __m256i __B, const int __imm)
936c0fe4 10302{
6b62f323 10303 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
ce2ad8cc 10304 (__v8si) __B,
6b62f323
JJ
10305 __imm,
10306 (__mmask8) __U);
936c0fe4
AI
10307}
10308
6b62f323 10309extern __inline __m128d
936c0fe4 10310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc 10311_mm_fixupimm_pd (__m128d __A, __m128i __B,
6b62f323 10312 const int __imm)
936c0fe4 10313{
ce2ad8cc
WX
10314 return (__m128d) __builtin_ia32_fixupimmpd128 ((__v2df) __A,
10315 (__v2di) __B,
10316 __imm);
936c0fe4
AI
10317}
10318
6b62f323 10319extern __inline __m128d
936c0fe4 10320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc
WX
10321_mm_mask_fixupimm_pd (__m128d __W, __mmask8 __U, __m128d __A,
10322 __m128i __B, const int __imm)
936c0fe4 10323{
6b62f323 10324 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
ce2ad8cc 10325 (__v2di) __B,
6b62f323 10326 __imm,
ce2ad8cc 10327 (__v2df) __W,
6b62f323 10328 (__mmask8) __U);
936c0fe4
AI
10329}
10330
6b62f323 10331extern __inline __m128d
936c0fe4 10332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc
WX
10333_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A,
10334 __m128i __B, const int __imm)
936c0fe4 10335{
6b62f323 10336 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
ce2ad8cc 10337 (__v2di) __B,
6b62f323
JJ
10338 __imm,
10339 (__mmask8) __U);
936c0fe4
AI
10340}
10341
6b62f323 10342extern __inline __m128
936c0fe4 10343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc 10344_mm_fixupimm_ps (__m128 __A, __m128i __B, const int __imm)
936c0fe4 10345{
ce2ad8cc
WX
10346 return (__m128) __builtin_ia32_fixupimmps128 ((__v4sf) __A,
10347 (__v4si) __B,
10348 __imm);
936c0fe4
AI
10349}
10350
6b62f323 10351extern __inline __m128
936c0fe4 10352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc
WX
10353_mm_mask_fixupimm_ps (__m128 __W, __mmask8 __U, __m128 __A,
10354 __m128i __B, const int __imm)
936c0fe4 10355{
6b62f323 10356 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
ce2ad8cc 10357 (__v4si) __B,
6b62f323 10358 __imm,
ce2ad8cc 10359 (__v4sf) __W,
6b62f323 10360 (__mmask8) __U);
936c0fe4
AI
10361}
10362
6b62f323 10363extern __inline __m128
936c0fe4 10364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
ce2ad8cc
WX
10365_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A,
10366 __m128i __B, const int __imm)
936c0fe4 10367{
6b62f323 10368 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
ce2ad8cc 10369 (__v4si) __B,
6b62f323
JJ
10370 __imm,
10371 (__mmask8) __U);
936c0fe4
AI
10372}
10373
6b62f323 10374extern __inline __m256i
936c0fe4 10375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10376_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10377 const int __imm)
936c0fe4 10378{
6b62f323
JJ
10379 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10380 (__v8si) __W,
10381 (__mmask8) __U);
936c0fe4
AI
10382}
10383
6b62f323 10384extern __inline __m256i
936c0fe4 10385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10386_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
936c0fe4 10387{
6b62f323
JJ
10388 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10389 (__v8si)
10390 _mm256_setzero_si256 (),
10391 (__mmask8) __U);
936c0fe4
AI
10392}
10393
6b62f323 10394extern __inline __m128i
936c0fe4 10395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10396_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10397 const int __imm)
936c0fe4 10398{
6b62f323
JJ
10399 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10400 (__v4si) __W,
10401 (__mmask8) __U);
936c0fe4
AI
10402}
10403
6b62f323 10404extern __inline __m128i
936c0fe4 10405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10406_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
936c0fe4 10407{
6b62f323
JJ
10408 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10409 (__v4si)
10410 _mm_setzero_si128 (),
10411 (__mmask8) __U);
936c0fe4
AI
10412}
10413
6b62f323 10414extern __inline __m256i
936c0fe4 10415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10416_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10417 const int __imm)
936c0fe4 10418{
6b62f323
JJ
10419 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10420 (__v4di) __W,
10421 (__mmask8) __U);
936c0fe4
AI
10422}
10423
6b62f323 10424extern __inline __m256i
936c0fe4 10425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10426_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
936c0fe4 10427{
6b62f323
JJ
10428 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10429 (__v4di)
10430 _mm256_setzero_si256 (),
10431 (__mmask8) __U);
936c0fe4
AI
10432}
10433
6b62f323 10434extern __inline __m128i
936c0fe4 10435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10436_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10437 const int __imm)
936c0fe4 10438{
6b62f323
JJ
10439 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10440 (__v2di) __W,
10441 (__mmask8) __U);
936c0fe4
AI
10442}
10443
6b62f323 10444extern __inline __m128i
936c0fe4 10445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10446_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
936c0fe4 10447{
6b62f323
JJ
10448 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10449 (__v2di)
10450 _mm_setzero_si128 (),
10451 (__mmask8) __U);
936c0fe4
AI
10452}
10453
6b62f323 10454extern __inline __m256i
936c0fe4 10455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10456_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
10457 const int __imm)
936c0fe4 10458{
6b62f323
JJ
10459 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10460 (__v4di) __B,
10461 (__v4di) __C, __imm,
10462 (__mmask8) -1);
936c0fe4
AI
10463}
10464
6b62f323 10465extern __inline __m256i
936c0fe4 10466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10467_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
10468 __m256i __B, __m256i __C,
10469 const int __imm)
936c0fe4 10470{
6b62f323
JJ
10471 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10472 (__v4di) __B,
10473 (__v4di) __C, __imm,
10474 (__mmask8) __U);
936c0fe4
AI
10475}
10476
6b62f323 10477extern __inline __m256i
936c0fe4 10478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10479_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
10480 __m256i __B, __m256i __C,
10481 const int __imm)
936c0fe4 10482{
6b62f323
JJ
10483 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
10484 (__v4di) __B,
10485 (__v4di) __C,
10486 __imm,
10487 (__mmask8) __U);
936c0fe4
AI
10488}
10489
6b62f323 10490extern __inline __m256i
936c0fe4 10491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10492_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
10493 const int __imm)
936c0fe4 10494{
6b62f323
JJ
10495 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10496 (__v8si) __B,
10497 (__v8si) __C, __imm,
10498 (__mmask8) -1);
936c0fe4
AI
10499}
10500
6b62f323 10501extern __inline __m256i
936c0fe4 10502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10503_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
10504 __m256i __B, __m256i __C,
10505 const int __imm)
936c0fe4 10506{
6b62f323
JJ
10507 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10508 (__v8si) __B,
10509 (__v8si) __C, __imm,
10510 (__mmask8) __U);
936c0fe4
AI
10511}
10512
6b62f323 10513extern __inline __m256i
936c0fe4 10514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10515_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
10516 __m256i __B, __m256i __C,
10517 const int __imm)
936c0fe4 10518{
6b62f323
JJ
10519 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
10520 (__v8si) __B,
10521 (__v8si) __C,
10522 __imm,
10523 (__mmask8) __U);
936c0fe4
AI
10524}
10525
6b62f323 10526extern __inline __m128i
936c0fe4 10527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10528_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
10529 const int __imm)
936c0fe4 10530{
6b62f323
JJ
10531 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10532 (__v2di) __B,
10533 (__v2di) __C, __imm,
10534 (__mmask8) -1);
936c0fe4
AI
10535}
10536
6b62f323 10537extern __inline __m128i
936c0fe4 10538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10539_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
10540 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10541{
6b62f323
JJ
10542 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10543 (__v2di) __B,
10544 (__v2di) __C, __imm,
10545 (__mmask8) __U);
936c0fe4
AI
10546}
10547
6b62f323 10548extern __inline __m128i
936c0fe4 10549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10550_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
10551 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10552{
6b62f323
JJ
10553 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
10554 (__v2di) __B,
10555 (__v2di) __C,
10556 __imm,
10557 (__mmask8) __U);
936c0fe4
AI
10558}
10559
6b62f323 10560extern __inline __m128i
936c0fe4 10561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10562_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
10563 const int __imm)
936c0fe4 10564{
6b62f323
JJ
10565 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10566 (__v4si) __B,
10567 (__v4si) __C, __imm,
10568 (__mmask8) -1);
936c0fe4
AI
10569}
10570
6b62f323 10571extern __inline __m128i
936c0fe4 10572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10573_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
10574 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10575{
6b62f323
JJ
10576 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10577 (__v4si) __B,
10578 (__v4si) __C, __imm,
10579 (__mmask8) __U);
936c0fe4
AI
10580}
10581
6b62f323 10582extern __inline __m128i
936c0fe4 10583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10584_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
10585 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10586{
6b62f323
JJ
10587 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
10588 (__v4si) __B,
10589 (__v4si) __C,
10590 __imm,
10591 (__mmask8) __U);
936c0fe4
AI
10592}
10593
6b62f323 10594extern __inline __m256
936c0fe4 10595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10596_mm256_roundscale_ps (__m256 __A, const int __imm)
936c0fe4 10597{
6b62f323
JJ
10598 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10599 __imm,
10600 (__v8sf)
10601 _mm256_setzero_ps (),
10602 (__mmask8) -1);
936c0fe4
AI
10603}
10604
6b62f323 10605extern __inline __m256
936c0fe4 10606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10607_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
10608 const int __imm)
936c0fe4 10609{
6b62f323
JJ
10610 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10611 __imm,
10612 (__v8sf) __W,
10613 (__mmask8) __U);
936c0fe4
AI
10614}
10615
6b62f323 10616extern __inline __m256
936c0fe4 10617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10618_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
936c0fe4 10619{
6b62f323
JJ
10620 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10621 __imm,
10622 (__v8sf)
10623 _mm256_setzero_ps (),
10624 (__mmask8) __U);
936c0fe4
AI
10625}
10626
6b62f323 10627extern __inline __m256d
936c0fe4 10628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10629_mm256_roundscale_pd (__m256d __A, const int __imm)
936c0fe4 10630{
6b62f323
JJ
10631 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10632 __imm,
10633 (__v4df)
10634 _mm256_setzero_pd (),
10635 (__mmask8) -1);
936c0fe4
AI
10636}
10637
6b62f323 10638extern __inline __m256d
936c0fe4 10639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10640_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
10641 const int __imm)
936c0fe4 10642{
6b62f323
JJ
10643 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10644 __imm,
10645 (__v4df) __W,
10646 (__mmask8) __U);
936c0fe4
AI
10647}
10648
6b62f323 10649extern __inline __m256d
936c0fe4 10650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10651_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
936c0fe4 10652{
6b62f323
JJ
10653 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10654 __imm,
10655 (__v4df)
10656 _mm256_setzero_pd (),
10657 (__mmask8) __U);
936c0fe4
AI
10658}
10659
6b62f323 10660extern __inline __m128
936c0fe4 10661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10662_mm_roundscale_ps (__m128 __A, const int __imm)
936c0fe4 10663{
6b62f323
JJ
10664 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10665 __imm,
10666 (__v4sf)
10667 _mm_setzero_ps (),
10668 (__mmask8) -1);
936c0fe4
AI
10669}
10670
6b62f323 10671extern __inline __m128
936c0fe4 10672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10673_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10674 const int __imm)
936c0fe4 10675{
6b62f323
JJ
10676 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10677 __imm,
10678 (__v4sf) __W,
10679 (__mmask8) __U);
936c0fe4
AI
10680}
10681
6b62f323 10682extern __inline __m128
936c0fe4 10683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10684_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
936c0fe4 10685{
6b62f323
JJ
10686 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10687 __imm,
10688 (__v4sf)
10689 _mm_setzero_ps (),
10690 (__mmask8) __U);
936c0fe4
AI
10691}
10692
6b62f323 10693extern __inline __m128d
936c0fe4 10694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10695_mm_roundscale_pd (__m128d __A, const int __imm)
936c0fe4 10696{
6b62f323
JJ
10697 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10698 __imm,
10699 (__v2df)
10700 _mm_setzero_pd (),
10701 (__mmask8) -1);
936c0fe4
AI
10702}
10703
6b62f323 10704extern __inline __m128d
936c0fe4 10705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10706_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10707 const int __imm)
936c0fe4 10708{
6b62f323
JJ
10709 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10710 __imm,
10711 (__v2df) __W,
10712 (__mmask8) __U);
936c0fe4
AI
10713}
10714
6b62f323 10715extern __inline __m128d
936c0fe4 10716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10717_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
936c0fe4 10718{
6b62f323
JJ
10719 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10720 __imm,
10721 (__v2df)
10722 _mm_setzero_pd (),
10723 (__mmask8) __U);
936c0fe4
AI
10724}
10725
6b62f323 10726extern __inline __m256
936c0fe4 10727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10728_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10729 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10730{
6b62f323
JJ
10731 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10732 (__C << 2) | __B,
10733 (__v8sf)
10734 _mm256_setzero_ps (),
10735 (__mmask8) -1);
936c0fe4
AI
10736}
10737
6b62f323 10738extern __inline __m256
936c0fe4 10739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10740_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10741 _MM_MANTISSA_NORM_ENUM __B,
10742 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10743{
6b62f323
JJ
10744 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10745 (__C << 2) | __B,
10746 (__v8sf) __W,
10747 (__mmask8) __U);
936c0fe4
AI
10748}
10749
6b62f323 10750extern __inline __m256
936c0fe4 10751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10752_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10753 _MM_MANTISSA_NORM_ENUM __B,
10754 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10755{
6b62f323
JJ
10756 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10757 (__C << 2) | __B,
10758 (__v8sf)
10759 _mm256_setzero_ps (),
10760 (__mmask8) __U);
936c0fe4
AI
10761}
10762
6b62f323 10763extern __inline __m128
936c0fe4 10764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10765_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10766 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10767{
6b62f323
JJ
10768 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10769 (__C << 2) | __B,
10770 (__v4sf)
10771 _mm_setzero_ps (),
10772 (__mmask8) -1);
936c0fe4
AI
10773}
10774
6b62f323 10775extern __inline __m128
936c0fe4 10776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10777_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10778 _MM_MANTISSA_NORM_ENUM __B,
10779 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10780{
6b62f323
JJ
10781 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10782 (__C << 2) | __B,
10783 (__v4sf) __W,
10784 (__mmask8) __U);
936c0fe4
AI
10785}
10786
6b62f323 10787extern __inline __m128
936c0fe4 10788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10789_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10790 _MM_MANTISSA_NORM_ENUM __B,
10791 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10792{
6b62f323
JJ
10793 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10794 (__C << 2) | __B,
10795 (__v4sf)
10796 _mm_setzero_ps (),
10797 (__mmask8) __U);
936c0fe4
AI
10798}
10799
6b62f323 10800extern __inline __m256d
936c0fe4 10801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10802_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10803 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10804{
6b62f323
JJ
10805 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10806 (__C << 2) | __B,
10807 (__v4df)
10808 _mm256_setzero_pd (),
10809 (__mmask8) -1);
936c0fe4
AI
10810}
10811
6b62f323 10812extern __inline __m256d
936c0fe4 10813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10814_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10815 _MM_MANTISSA_NORM_ENUM __B,
10816 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10817{
6b62f323
JJ
10818 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10819 (__C << 2) | __B,
10820 (__v4df) __W,
10821 (__mmask8) __U);
936c0fe4
AI
10822}
10823
6b62f323 10824extern __inline __m256d
936c0fe4 10825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10826_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10827 _MM_MANTISSA_NORM_ENUM __B,
10828 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10829{
6b62f323
JJ
10830 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10831 (__C << 2) | __B,
10832 (__v4df)
10833 _mm256_setzero_pd (),
10834 (__mmask8) __U);
936c0fe4
AI
10835}
10836
6b62f323 10837extern __inline __m128d
936c0fe4 10838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10839_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10840 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10841{
6b62f323
JJ
10842 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10843 (__C << 2) | __B,
10844 (__v2df)
10845 _mm_setzero_pd (),
10846 (__mmask8) -1);
936c0fe4
AI
10847}
10848
6b62f323 10849extern __inline __m128d
936c0fe4 10850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10851_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10852 _MM_MANTISSA_NORM_ENUM __B,
10853 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10854{
6b62f323
JJ
10855 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10856 (__C << 2) | __B,
10857 (__v2df) __W,
10858 (__mmask8) __U);
936c0fe4
AI
10859}
10860
6b62f323 10861extern __inline __m128d
936c0fe4 10862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10863_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10864 _MM_MANTISSA_NORM_ENUM __B,
10865 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10866{
6b62f323
JJ
10867 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10868 (__C << 2) | __B,
10869 (__v2df)
10870 _mm_setzero_pd (),
10871 (__mmask8) __U);
936c0fe4
AI
10872}
10873
6b62f323 10874extern __inline __m256
936c0fe4 10875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10876_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10877 __m256i __index, void const *__addr,
10878 int __scale)
936c0fe4 10879{
6b62f323
JJ
10880 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10881 __addr,
10882 (__v8si) __index,
10883 __mask, __scale);
936c0fe4
AI
10884}
10885
6b62f323 10886extern __inline __m128
936c0fe4 10887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10888_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10889 __m128i __index, void const *__addr,
10890 int __scale)
936c0fe4 10891{
6b62f323
JJ
10892 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10893 __addr,
10894 (__v4si) __index,
10895 __mask, __scale);
936c0fe4
AI
10896}
10897
6b62f323 10898extern __inline __m256d
936c0fe4 10899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10900_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10901 __m128i __index, void const *__addr,
10902 int __scale)
936c0fe4 10903{
6b62f323
JJ
10904 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10905 __addr,
10906 (__v4si) __index,
10907 __mask, __scale);
936c0fe4
AI
10908}
10909
6b62f323 10910extern __inline __m128d
936c0fe4 10911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10912_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10913 __m128i __index, void const *__addr,
10914 int __scale)
936c0fe4 10915{
6b62f323
JJ
10916 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10917 __addr,
10918 (__v4si) __index,
10919 __mask, __scale);
936c0fe4
AI
10920}
10921
6b62f323 10922extern __inline __m128
936c0fe4 10923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10924_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10925 __m256i __index, void const *__addr,
10926 int __scale)
10927{
10928 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10929 __addr,
10930 (__v4di) __index,
10931 __mask, __scale);
936c0fe4
AI
10932}
10933
6b62f323 10934extern __inline __m128
936c0fe4 10935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10936_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10937 __m128i __index, void const *__addr,
10938 int __scale)
936c0fe4 10939{
6b62f323
JJ
10940 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10941 __addr,
10942 (__v2di) __index,
10943 __mask, __scale);
936c0fe4
AI
10944}
10945
6b62f323 10946extern __inline __m256d
936c0fe4 10947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10948_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10949 __m256i __index, void const *__addr,
10950 int __scale)
936c0fe4 10951{
6b62f323
JJ
10952 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10953 __addr,
10954 (__v4di) __index,
10955 __mask, __scale);
936c0fe4
AI
10956}
10957
6b62f323 10958extern __inline __m128d
936c0fe4 10959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10960_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10961 __m128i __index, void const *__addr,
10962 int __scale)
936c0fe4 10963{
6b62f323
JJ
10964 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10965 __addr,
10966 (__v2di) __index,
10967 __mask, __scale);
936c0fe4
AI
10968}
10969
6b62f323 10970extern __inline __m256i
936c0fe4 10971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10972_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10973 __m256i __index, void const *__addr,
10974 int __scale)
936c0fe4 10975{
6b62f323
JJ
10976 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10977 __addr,
10978 (__v8si) __index,
10979 __mask, __scale);
936c0fe4
AI
10980}
10981
10982extern __inline __m128i
10983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10984_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10985 __m128i __index, void const *__addr,
10986 int __scale)
936c0fe4 10987{
6b62f323
JJ
10988 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
10989 __addr,
10990 (__v4si) __index,
10991 __mask, __scale);
936c0fe4
AI
10992}
10993
10994extern __inline __m256i
10995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10996_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10997 __m128i __index, void const *__addr,
10998 int __scale)
936c0fe4 10999{
6b62f323
JJ
11000 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
11001 __addr,
11002 (__v4si) __index,
11003 __mask, __scale);
936c0fe4
AI
11004}
11005
6b62f323 11006extern __inline __m128i
936c0fe4 11007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11008_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11009 __m128i __index, void const *__addr,
11010 int __scale)
936c0fe4 11011{
6b62f323
JJ
11012 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
11013 __addr,
11014 (__v4si) __index,
11015 __mask, __scale);
936c0fe4
AI
11016}
11017
6b62f323 11018extern __inline __m128i
936c0fe4 11019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11020_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11021 __m256i __index, void const *__addr,
11022 int __scale)
936c0fe4 11023{
6b62f323
JJ
11024 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
11025 __addr,
11026 (__v4di) __index,
11027 __mask, __scale);
936c0fe4
AI
11028}
11029
11030extern __inline __m128i
11031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11032_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11033 __m128i __index, void const *__addr,
11034 int __scale)
936c0fe4 11035{
6b62f323
JJ
11036 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
11037 __addr,
11038 (__v2di) __index,
11039 __mask, __scale);
936c0fe4
AI
11040}
11041
6b62f323 11042extern __inline __m256i
936c0fe4 11043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11044_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11045 __m256i __index, void const *__addr,
11046 int __scale)
936c0fe4 11047{
6b62f323
JJ
11048 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
11049 __addr,
11050 (__v4di) __index,
11051 __mask, __scale);
936c0fe4
AI
11052}
11053
11054extern __inline __m128i
11055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11056_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11057 __m128i __index, void const *__addr,
11058 int __scale)
936c0fe4 11059{
6b62f323
JJ
11060 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
11061 __addr,
11062 (__v2di) __index,
11063 __mask, __scale);
936c0fe4
AI
11064}
11065
6b62f323 11066extern __inline void
936c0fe4 11067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11068_mm256_i32scatter_ps (void *__addr, __m256i __index,
11069 __m256 __v1, const int __scale)
936c0fe4 11070{
6b62f323
JJ
11071 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
11072 (__v8si) __index, (__v8sf) __v1,
11073 __scale);
936c0fe4
AI
11074}
11075
6b62f323 11076extern __inline void
936c0fe4 11077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11078_mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11079 __m256i __index, __m256 __v1,
11080 const int __scale)
936c0fe4 11081{
6b62f323
JJ
11082 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
11083 (__v8sf) __v1, __scale);
936c0fe4
AI
11084}
11085
6b62f323 11086extern __inline void
936c0fe4 11087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11088_mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11089 const int __scale)
936c0fe4 11090{
6b62f323
JJ
11091 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
11092 (__v4si) __index, (__v4sf) __v1,
11093 __scale);
936c0fe4
AI
11094}
11095
6b62f323 11096extern __inline void
936c0fe4 11097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11098_mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11099 __m128i __index, __m128 __v1,
11100 const int __scale)
936c0fe4 11101{
6b62f323
JJ
11102 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
11103 (__v4sf) __v1, __scale);
936c0fe4
AI
11104}
11105
6b62f323 11106extern __inline void
936c0fe4 11107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11108_mm256_i32scatter_pd (void *__addr, __m128i __index,
11109 __m256d __v1, const int __scale)
936c0fe4 11110{
6b62f323
JJ
11111 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
11112 (__v4si) __index, (__v4df) __v1,
11113 __scale);
936c0fe4
AI
11114}
11115
6b62f323 11116extern __inline void
936c0fe4 11117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11118_mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11119 __m128i __index, __m256d __v1,
11120 const int __scale)
936c0fe4 11121{
6b62f323
JJ
11122 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
11123 (__v4df) __v1, __scale);
936c0fe4
AI
11124}
11125
6b62f323 11126extern __inline void
936c0fe4 11127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11128_mm_i32scatter_pd (void *__addr, __m128i __index,
11129 __m128d __v1, const int __scale)
936c0fe4 11130{
6b62f323
JJ
11131 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
11132 (__v4si) __index, (__v2df) __v1,
11133 __scale);
936c0fe4
AI
11134}
11135
6b62f323 11136extern __inline void
936c0fe4 11137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11138_mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11139 __m128i __index, __m128d __v1,
11140 const int __scale)
936c0fe4 11141{
6b62f323
JJ
11142 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
11143 (__v2df) __v1, __scale);
936c0fe4
AI
11144}
11145
6b62f323 11146extern __inline void
936c0fe4 11147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11148_mm256_i64scatter_ps (void *__addr, __m256i __index,
11149 __m128 __v1, const int __scale)
936c0fe4 11150{
6b62f323
JJ
11151 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
11152 (__v4di) __index, (__v4sf) __v1,
11153 __scale);
936c0fe4
AI
11154}
11155
6b62f323 11156extern __inline void
936c0fe4 11157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11158_mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11159 __m256i __index, __m128 __v1,
11160 const int __scale)
936c0fe4 11161{
6b62f323
JJ
11162 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
11163 (__v4sf) __v1, __scale);
936c0fe4
AI
11164}
11165
6b62f323 11166extern __inline void
936c0fe4 11167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11168_mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11169 const int __scale)
936c0fe4 11170{
6b62f323
JJ
11171 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
11172 (__v2di) __index, (__v4sf) __v1,
11173 __scale);
936c0fe4
AI
11174}
11175
6b62f323 11176extern __inline void
936c0fe4 11177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11178_mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11179 __m128i __index, __m128 __v1,
11180 const int __scale)
936c0fe4 11181{
6b62f323
JJ
11182 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
11183 (__v4sf) __v1, __scale);
936c0fe4
AI
11184}
11185
6b62f323 11186extern __inline void
936c0fe4 11187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11188_mm256_i64scatter_pd (void *__addr, __m256i __index,
11189 __m256d __v1, const int __scale)
936c0fe4 11190{
6b62f323
JJ
11191 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
11192 (__v4di) __index, (__v4df) __v1,
11193 __scale);
936c0fe4
AI
11194}
11195
6b62f323 11196extern __inline void
936c0fe4 11197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11198_mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11199 __m256i __index, __m256d __v1,
11200 const int __scale)
936c0fe4 11201{
6b62f323
JJ
11202 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
11203 (__v4df) __v1, __scale);
936c0fe4
AI
11204}
11205
6b62f323 11206extern __inline void
936c0fe4 11207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11208_mm_i64scatter_pd (void *__addr, __m128i __index,
11209 __m128d __v1, const int __scale)
936c0fe4 11210{
6b62f323
JJ
11211 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
11212 (__v2di) __index, (__v2df) __v1,
11213 __scale);
936c0fe4
AI
11214}
11215
6b62f323 11216extern __inline void
936c0fe4 11217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11218_mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11219 __m128i __index, __m128d __v1,
11220 const int __scale)
936c0fe4 11221{
6b62f323
JJ
11222 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
11223 (__v2df) __v1, __scale);
936c0fe4
AI
11224}
11225
6b62f323 11226extern __inline void
936c0fe4 11227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11228_mm256_i32scatter_epi32 (void *__addr, __m256i __index,
11229 __m256i __v1, const int __scale)
936c0fe4 11230{
6b62f323
JJ
11231 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
11232 (__v8si) __index, (__v8si) __v1,
11233 __scale);
936c0fe4
AI
11234}
11235
6b62f323 11236extern __inline void
936c0fe4 11237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11238_mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11239 __m256i __index, __m256i __v1,
11240 const int __scale)
936c0fe4 11241{
6b62f323
JJ
11242 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
11243 (__v8si) __v1, __scale);
936c0fe4
AI
11244}
11245
6b62f323 11246extern __inline void
936c0fe4 11247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11248_mm_i32scatter_epi32 (void *__addr, __m128i __index,
11249 __m128i __v1, const int __scale)
936c0fe4 11250{
6b62f323
JJ
11251 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
11252 (__v4si) __index, (__v4si) __v1,
11253 __scale);
936c0fe4
AI
11254}
11255
6b62f323 11256extern __inline void
936c0fe4 11257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11258_mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11259 __m128i __index, __m128i __v1,
11260 const int __scale)
936c0fe4 11261{
6b62f323
JJ
11262 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
11263 (__v4si) __v1, __scale);
936c0fe4
AI
11264}
11265
6b62f323 11266extern __inline void
936c0fe4 11267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11268_mm256_i32scatter_epi64 (void *__addr, __m128i __index,
11269 __m256i __v1, const int __scale)
936c0fe4 11270{
6b62f323
JJ
11271 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
11272 (__v4si) __index, (__v4di) __v1,
11273 __scale);
936c0fe4
AI
11274}
11275
6b62f323 11276extern __inline void
936c0fe4 11277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11278_mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11279 __m128i __index, __m256i __v1,
11280 const int __scale)
936c0fe4 11281{
6b62f323
JJ
11282 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
11283 (__v4di) __v1, __scale);
936c0fe4
AI
11284}
11285
6b62f323 11286extern __inline void
936c0fe4 11287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11288_mm_i32scatter_epi64 (void *__addr, __m128i __index,
11289 __m128i __v1, const int __scale)
936c0fe4 11290{
6b62f323
JJ
11291 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
11292 (__v4si) __index, (__v2di) __v1,
11293 __scale);
936c0fe4
AI
11294}
11295
6b62f323 11296extern __inline void
936c0fe4 11297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11298_mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11299 __m128i __index, __m128i __v1,
11300 const int __scale)
936c0fe4 11301{
6b62f323
JJ
11302 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
11303 (__v2di) __v1, __scale);
936c0fe4
AI
11304}
11305
6b62f323 11306extern __inline void
936c0fe4 11307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11308_mm256_i64scatter_epi32 (void *__addr, __m256i __index,
11309 __m128i __v1, const int __scale)
936c0fe4 11310{
6b62f323
JJ
11311 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
11312 (__v4di) __index, (__v4si) __v1,
11313 __scale);
936c0fe4
AI
11314}
11315
6b62f323 11316extern __inline void
936c0fe4 11317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11318_mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11319 __m256i __index, __m128i __v1,
11320 const int __scale)
936c0fe4 11321{
6b62f323
JJ
11322 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
11323 (__v4si) __v1, __scale);
936c0fe4
AI
11324}
11325
6b62f323 11326extern __inline void
936c0fe4 11327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11328_mm_i64scatter_epi32 (void *__addr, __m128i __index,
11329 __m128i __v1, const int __scale)
936c0fe4 11330{
6b62f323
JJ
11331 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
11332 (__v2di) __index, (__v4si) __v1,
11333 __scale);
936c0fe4
AI
11334}
11335
6b62f323 11336extern __inline void
936c0fe4 11337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11338_mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11339 __m128i __index, __m128i __v1,
11340 const int __scale)
936c0fe4 11341{
6b62f323
JJ
11342 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
11343 (__v4si) __v1, __scale);
936c0fe4
AI
11344}
11345
6b62f323 11346extern __inline void
936c0fe4 11347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11348_mm256_i64scatter_epi64 (void *__addr, __m256i __index,
11349 __m256i __v1, const int __scale)
936c0fe4 11350{
6b62f323
JJ
11351 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
11352 (__v4di) __index, (__v4di) __v1,
11353 __scale);
936c0fe4
AI
11354}
11355
6b62f323 11356extern __inline void
936c0fe4 11357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11358_mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11359 __m256i __index, __m256i __v1,
11360 const int __scale)
936c0fe4 11361{
6b62f323
JJ
11362 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
11363 (__v4di) __v1, __scale);
936c0fe4
AI
11364}
11365
6b62f323 11366extern __inline void
936c0fe4 11367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11368_mm_i64scatter_epi64 (void *__addr, __m128i __index,
11369 __m128i __v1, const int __scale)
936c0fe4 11370{
6b62f323
JJ
11371 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
11372 (__v2di) __index, (__v2di) __v1,
11373 __scale);
936c0fe4
AI
11374}
11375
6b62f323 11376extern __inline void
936c0fe4 11377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11378_mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11379 __m128i __index, __m128i __v1,
11380 const int __scale)
936c0fe4 11381{
6b62f323
JJ
11382 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
11383 (__v2di) __v1, __scale);
936c0fe4
AI
11384}
11385
11386extern __inline __m256i
11387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11388_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11389 _MM_PERM_ENUM __mask)
936c0fe4 11390{
6b62f323
JJ
11391 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11392 (__v8si) __W,
936c0fe4
AI
11393 (__mmask8) __U);
11394}
11395
11396extern __inline __m256i
11397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11398_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
11399 _MM_PERM_ENUM __mask)
936c0fe4 11400{
6b62f323
JJ
11401 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11402 (__v8si)
936c0fe4
AI
11403 _mm256_setzero_si256 (),
11404 (__mmask8) __U);
11405}
11406
6b62f323 11407extern __inline __m128i
936c0fe4 11408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11409_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11410 _MM_PERM_ENUM __mask)
936c0fe4 11411{
6b62f323
JJ
11412 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11413 (__v4si) __W,
936c0fe4
AI
11414 (__mmask8) __U);
11415}
11416
6b62f323 11417extern __inline __m128i
936c0fe4 11418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11419_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
11420 _MM_PERM_ENUM __mask)
936c0fe4 11421{
6b62f323
JJ
11422 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11423 (__v4si)
11424 _mm_setzero_si128 (),
936c0fe4
AI
11425 (__mmask8) __U);
11426}
11427
6b62f323 11428extern __inline __m256i
936c0fe4 11429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11430_mm256_rol_epi32 (__m256i __A, const int __B)
936c0fe4 11431{
6b62f323
JJ
11432 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11433 (__v8si)
11434 _mm256_setzero_si256 (),
11435 (__mmask8) -1);
936c0fe4
AI
11436}
11437
6b62f323 11438extern __inline __m256i
936c0fe4 11439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11440_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11441 const int __B)
936c0fe4 11442{
6b62f323
JJ
11443 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11444 (__v8si) __W,
11445 (__mmask8) __U);
936c0fe4
AI
11446}
11447
6b62f323 11448extern __inline __m256i
936c0fe4 11449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11450_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11451{
6b62f323
JJ
11452 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11453 (__v8si)
11454 _mm256_setzero_si256 (),
11455 (__mmask8) __U);
936c0fe4
AI
11456}
11457
6b62f323 11458extern __inline __m128i
936c0fe4 11459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11460_mm_rol_epi32 (__m128i __A, const int __B)
936c0fe4 11461{
6b62f323
JJ
11462 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11463 (__v4si)
11464 _mm_setzero_si128 (),
11465 (__mmask8) -1);
936c0fe4
AI
11466}
11467
6b62f323 11468extern __inline __m128i
936c0fe4 11469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11470_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11471 const int __B)
936c0fe4 11472{
6b62f323
JJ
11473 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11474 (__v4si) __W,
936c0fe4
AI
11475 (__mmask8) __U);
11476}
11477
6b62f323 11478extern __inline __m128i
936c0fe4 11479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11480_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11481{
6b62f323
JJ
11482 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11483 (__v4si)
11484 _mm_setzero_si128 (),
936c0fe4
AI
11485 (__mmask8) __U);
11486}
11487
6b62f323 11488extern __inline __m256i
936c0fe4 11489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11490_mm256_ror_epi32 (__m256i __A, const int __B)
936c0fe4 11491{
6b62f323
JJ
11492 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11493 (__v8si)
11494 _mm256_setzero_si256 (),
11495 (__mmask8) -1);
936c0fe4
AI
11496}
11497
6b62f323 11498extern __inline __m256i
936c0fe4 11499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11500_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11501 const int __B)
936c0fe4 11502{
6b62f323
JJ
11503 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11504 (__v8si) __W,
11505 (__mmask8) __U);
936c0fe4
AI
11506}
11507
11508extern __inline __m256i
11509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11510_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11511{
6b62f323
JJ
11512 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11513 (__v8si)
11514 _mm256_setzero_si256 (),
11515 (__mmask8) __U);
936c0fe4
AI
11516}
11517
6b62f323 11518extern __inline __m128i
936c0fe4 11519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11520_mm_ror_epi32 (__m128i __A, const int __B)
936c0fe4 11521{
6b62f323
JJ
11522 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11523 (__v4si)
11524 _mm_setzero_si128 (),
11525 (__mmask8) -1);
936c0fe4
AI
11526}
11527
6b62f323 11528extern __inline __m128i
936c0fe4 11529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11530_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11531 const int __B)
936c0fe4 11532{
6b62f323
JJ
11533 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11534 (__v4si) __W,
11535 (__mmask8) __U);
936c0fe4
AI
11536}
11537
6b62f323 11538extern __inline __m128i
936c0fe4 11539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11540_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11541{
6b62f323
JJ
11542 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11543 (__v4si)
11544 _mm_setzero_si128 (),
11545 (__mmask8) __U);
936c0fe4
AI
11546}
11547
6b62f323 11548extern __inline __m256i
936c0fe4 11549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11550_mm256_rol_epi64 (__m256i __A, const int __B)
936c0fe4 11551{
6b62f323
JJ
11552 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11553 (__v4di)
11554 _mm256_setzero_si256 (),
11555 (__mmask8) -1);
936c0fe4
AI
11556}
11557
6b62f323 11558extern __inline __m256i
936c0fe4 11559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11560_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11561 const int __B)
936c0fe4 11562{
6b62f323
JJ
11563 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11564 (__v4di) __W,
11565 (__mmask8) __U);
936c0fe4
AI
11566}
11567
6b62f323 11568extern __inline __m256i
936c0fe4 11569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11570_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
11571{
11572 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11573 (__v4di)
11574 _mm256_setzero_si256 (),
11575 (__mmask8) __U);
936c0fe4
AI
11576}
11577
6b62f323 11578extern __inline __m128i
936c0fe4 11579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11580_mm_rol_epi64 (__m128i __A, const int __B)
936c0fe4 11581{
6b62f323
JJ
11582 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11583 (__v2di)
11584 _mm_setzero_si128 (),
936c0fe4
AI
11585 (__mmask8) -1);
11586}
11587
6b62f323 11588extern __inline __m128i
936c0fe4 11589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11590_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11591 const int __B)
936c0fe4 11592{
6b62f323
JJ
11593 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11594 (__v2di) __W,
11595 (__mmask8) __U);
936c0fe4
AI
11596}
11597
6b62f323 11598extern __inline __m128i
936c0fe4 11599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11600_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11601{
6b62f323
JJ
11602 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11603 (__v2di)
11604 _mm_setzero_si128 (),
11605 (__mmask8) __U);
936c0fe4
AI
11606}
11607
6b62f323 11608extern __inline __m256i
936c0fe4 11609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11610_mm256_ror_epi64 (__m256i __A, const int __B)
936c0fe4 11611{
6b62f323
JJ
11612 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11613 (__v4di)
11614 _mm256_setzero_si256 (),
11615 (__mmask8) -1);
936c0fe4
AI
11616}
11617
6b62f323 11618extern __inline __m256i
936c0fe4 11619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11620_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11621 const int __B)
936c0fe4 11622{
6b62f323
JJ
11623 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11624 (__v4di) __W,
11625 (__mmask8) __U);
936c0fe4
AI
11626}
11627
6b62f323 11628extern __inline __m256i
936c0fe4 11629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11630_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11631{
6b62f323
JJ
11632 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11633 (__v4di)
11634 _mm256_setzero_si256 (),
936c0fe4
AI
11635 (__mmask8) __U);
11636}
11637
6b62f323 11638extern __inline __m128i
936c0fe4 11639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11640_mm_ror_epi64 (__m128i __A, const int __B)
936c0fe4 11641{
6b62f323
JJ
11642 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11643 (__v2di)
11644 _mm_setzero_si128 (),
11645 (__mmask8) -1);
936c0fe4
AI
11646}
11647
6b62f323 11648extern __inline __m128i
936c0fe4 11649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11650_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11651 const int __B)
936c0fe4 11652{
6b62f323
JJ
11653 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11654 (__v2di) __W,
11655 (__mmask8) __U);
936c0fe4
AI
11656}
11657
6b62f323 11658extern __inline __m128i
936c0fe4 11659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11660_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11661{
6b62f323
JJ
11662 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11663 (__v2di)
11664 _mm_setzero_si128 (),
11665 (__mmask8) __U);
936c0fe4
AI
11666}
11667
6b62f323 11668extern __inline __m128i
936c0fe4 11669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11670_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
936c0fe4 11671{
6b62f323
JJ
11672 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11673 (__v4si) __B, __imm,
11674 (__v4si)
11675 _mm_setzero_si128 (),
11676 (__mmask8) -1);
936c0fe4
AI
11677}
11678
6b62f323 11679extern __inline __m128i
936c0fe4 11680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11681_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11682 __m128i __B, const int __imm)
936c0fe4 11683{
6b62f323
JJ
11684 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11685 (__v4si) __B, __imm,
11686 (__v4si) __W,
936c0fe4
AI
11687 (__mmask8) __U);
11688}
11689
6b62f323 11690extern __inline __m128i
936c0fe4 11691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11692_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11693 const int __imm)
936c0fe4 11694{
6b62f323
JJ
11695 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11696 (__v4si) __B, __imm,
11697 (__v4si)
11698 _mm_setzero_si128 (),
11699 (__mmask8) __U);
936c0fe4
AI
11700}
11701
6b62f323 11702extern __inline __m128i
936c0fe4 11703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11704_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
936c0fe4 11705{
6b62f323
JJ
11706 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11707 (__v2di) __B, __imm,
11708 (__v2di)
11709 _mm_setzero_si128 (),
11710 (__mmask8) -1);
936c0fe4
AI
11711}
11712
6b62f323 11713extern __inline __m128i
936c0fe4 11714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11715_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11716 __m128i __B, const int __imm)
936c0fe4 11717{
6b62f323
JJ
11718 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11719 (__v2di) __B, __imm,
11720 (__v2di) __W,
11721 (__mmask8) __U);
936c0fe4
AI
11722}
11723
6b62f323 11724extern __inline __m128i
936c0fe4 11725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11726_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11727 const int __imm)
936c0fe4 11728{
6b62f323
JJ
11729 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11730 (__v2di) __B, __imm,
11731 (__v2di)
11732 _mm_setzero_si128 (),
11733 (__mmask8) __U);
936c0fe4
AI
11734}
11735
6b62f323 11736extern __inline __m256i
936c0fe4 11737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11738_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 11739{
6b62f323
JJ
11740 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11741 (__v8si) __B, __imm,
11742 (__v8si)
11743 _mm256_setzero_si256 (),
936c0fe4
AI
11744 (__mmask8) -1);
11745}
11746
6b62f323 11747extern __inline __m256i
936c0fe4 11748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11749_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11750 __m256i __B, const int __imm)
936c0fe4 11751{
6b62f323
JJ
11752 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11753 (__v8si) __B, __imm,
11754 (__v8si) __W,
11755 (__mmask8) __U);
936c0fe4
AI
11756}
11757
6b62f323 11758extern __inline __m256i
936c0fe4 11759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11760_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11761 const int __imm)
936c0fe4 11762{
6b62f323
JJ
11763 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11764 (__v8si) __B, __imm,
11765 (__v8si)
11766 _mm256_setzero_si256 (),
11767 (__mmask8) __U);
936c0fe4
AI
11768}
11769
6b62f323 11770extern __inline __m256i
936c0fe4 11771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11772_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 11773{
6b62f323
JJ
11774 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11775 (__v4di) __B, __imm,
11776 (__v4di)
11777 _mm256_setzero_si256 (),
11778 (__mmask8) -1);
936c0fe4
AI
11779}
11780
6b62f323 11781extern __inline __m256i
936c0fe4 11782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11783_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11784 __m256i __B, const int __imm)
936c0fe4 11785{
6b62f323
JJ
11786 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11787 (__v4di) __B, __imm,
11788 (__v4di) __W,
936c0fe4
AI
11789 (__mmask8) __U);
11790}
11791
6b62f323 11792extern __inline __m256i
936c0fe4 11793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11794_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11795 const int __imm)
936c0fe4 11796{
6b62f323
JJ
11797 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11798 (__v4di) __B, __imm,
11799 (__v4di)
11800 _mm256_setzero_si256 (),
936c0fe4
AI
11801 (__mmask8) __U);
11802}
11803
6b62f323 11804extern __inline __m128i
936c0fe4 11805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11806_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11807 const int __I)
936c0fe4 11808{
6b62f323
JJ
11809 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11810 (__v8hi) __W,
936c0fe4
AI
11811 (__mmask8) __U);
11812}
11813
6b62f323 11814extern __inline __m128i
936c0fe4 11815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11816_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
936c0fe4 11817{
6b62f323
JJ
11818 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11819 (__v8hi)
11820 _mm_setzero_si128 (),
936c0fe4
AI
11821 (__mmask8) __U);
11822}
11823
6b62f323 11824extern __inline __m128i
936c0fe4 11825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11826_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11827 const int __I)
936c0fe4 11828{
6b62f323
JJ
11829 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11830 (__v8hi) __W,
11831 (__mmask8) __U);
936c0fe4
AI
11832}
11833
6b62f323
JJ
11834extern __inline __m128i
11835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11836_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
eee5d6f5 11837{
6b62f323
JJ
11838 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11839 (__v8hi)
11840 _mm_setzero_si128 (),
11841 (__mmask8) __U);
eee5d6f5
AI
11842}
11843
6b62f323
JJ
11844extern __inline __m256i
11845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11846_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11847 const int __imm)
936c0fe4 11848{
6b62f323
JJ
11849 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11850 (__v8si) __W,
11851 (__mmask8) __U);
936c0fe4
AI
11852}
11853
6b62f323
JJ
11854extern __inline __m256i
11855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11856_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
eee5d6f5 11857{
6b62f323
JJ
11858 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11859 (__v8si)
11860 _mm256_setzero_si256 (),
11861 (__mmask8) __U);
eee5d6f5
AI
11862}
11863
6b62f323
JJ
11864extern __inline __m128i
11865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11866_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11867 const int __imm)
936c0fe4 11868{
6b62f323
JJ
11869 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11870 (__v4si) __W,
11871 (__mmask8) __U);
936c0fe4
AI
11872}
11873
6b62f323
JJ
11874extern __inline __m128i
11875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11876_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
eee5d6f5 11877{
6b62f323
JJ
11878 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11879 (__v4si)
11880 _mm_setzero_si128 (),
11881 (__mmask8) __U);
eee5d6f5
AI
11882}
11883
6b62f323
JJ
11884extern __inline __m256i
11885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11886_mm256_srai_epi64 (__m256i __A, const int __imm)
936c0fe4 11887{
6b62f323
JJ
11888 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11889 (__v4di)
11890 _mm256_setzero_si256 (),
c42b0bdf 11891 (__mmask8) -1);
936c0fe4
AI
11892}
11893
6b62f323
JJ
11894extern __inline __m256i
11895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11896_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11897 const int __imm)
936c0fe4 11898{
6b62f323
JJ
11899 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11900 (__v4di) __W,
11901 (__mmask8) __U);
936c0fe4
AI
11902}
11903
6b62f323
JJ
11904extern __inline __m256i
11905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11906_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
eee5d6f5 11907{
6b62f323
JJ
11908 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11909 (__v4di)
11910 _mm256_setzero_si256 (),
11911 (__mmask8) __U);
eee5d6f5
AI
11912}
11913
6b62f323
JJ
11914extern __inline __m128i
11915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11916_mm_srai_epi64 (__m128i __A, const int __imm)
936c0fe4 11917{
6b62f323
JJ
11918 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11919 (__v2di)
11920 _mm_setzero_si128 (),
c42b0bdf 11921 (__mmask8) -1);
936c0fe4
AI
11922}
11923
6b62f323
JJ
11924extern __inline __m128i
11925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11926_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11927 const int __imm)
936c0fe4 11928{
6b62f323
JJ
11929 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11930 (__v2di) __W,
11931 (__mmask8) __U);
936c0fe4
AI
11932}
11933
6b62f323
JJ
11934extern __inline __m128i
11935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11936_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
eee5d6f5 11937{
6b62f323
JJ
11938 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11939 (__v2di)
11940 _mm_setzero_si128 (),
11941 (__mmask8) __U);
eee5d6f5
AI
11942}
11943
6b62f323
JJ
11944extern __inline __m128i
11945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11946_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
936c0fe4 11947{
6b62f323
JJ
11948 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11949 (__v4si) __W,
11950 (__mmask8) __U);
936c0fe4
AI
11951}
11952
6b62f323
JJ
11953extern __inline __m128i
11954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11955_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
eee5d6f5 11956{
6b62f323
JJ
11957 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11958 (__v4si)
11959 _mm_setzero_si128 (),
11960 (__mmask8) __U);
eee5d6f5
AI
11961}
11962
6b62f323
JJ
11963extern __inline __m128i
11964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11965_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
936c0fe4 11966{
6b62f323
JJ
11967 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11968 (__v2di) __W,
11969 (__mmask8) __U);
936c0fe4
AI
11970}
11971
6b62f323
JJ
11972extern __inline __m128i
11973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11974_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
eee5d6f5 11975{
6b62f323
JJ
11976 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11977 (__v2di)
11978 _mm_setzero_si128 (),
11979 (__mmask8) __U);
eee5d6f5
AI
11980}
11981
6b62f323
JJ
11982extern __inline __m256i
11983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11984_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11985 int __B)
936c0fe4 11986{
6b62f323
JJ
11987 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11988 (__v8si) __W,
11989 (__mmask8) __U);
936c0fe4
AI
11990}
11991
6b62f323
JJ
11992extern __inline __m256i
11993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11994_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
11995{
11996 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11997 (__v8si)
11998 _mm256_setzero_si256 (),
11999 (__mmask8) __U);
eee5d6f5
AI
12000}
12001
6b62f323
JJ
12002extern __inline __m256i
12003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12004_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
12005 int __B)
936c0fe4 12006{
6b62f323
JJ
12007 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12008 (__v4di) __W,
12009 (__mmask8) __U);
936c0fe4
AI
12010}
12011
6b62f323
JJ
12012extern __inline __m256i
12013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12014_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
eee5d6f5 12015{
6b62f323
JJ
12016 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12017 (__v4di)
12018 _mm256_setzero_si256 (),
12019 (__mmask8) __U);
eee5d6f5
AI
12020}
12021
6b62f323
JJ
12022extern __inline __m256d
12023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12024_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
12025 const int __imm)
936c0fe4 12026{
6b62f323
JJ
12027 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12028 (__v4df) __W,
12029 (__mmask8) __U);
936c0fe4
AI
12030}
12031
6b62f323
JJ
12032extern __inline __m256d
12033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12034_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
eee5d6f5 12035{
6b62f323
JJ
12036 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12037 (__v4df)
12038 _mm256_setzero_pd (),
12039 (__mmask8) __U);
eee5d6f5
AI
12040}
12041
6b62f323
JJ
12042extern __inline __m256d
12043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12044_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
12045 const int __C)
936c0fe4 12046{
6b62f323
JJ
12047 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12048 (__v4df) __W,
12049 (__mmask8) __U);
936c0fe4
AI
12050}
12051
6b62f323
JJ
12052extern __inline __m256d
12053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12054_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
eee5d6f5 12055{
6b62f323
JJ
12056 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12057 (__v4df)
12058 _mm256_setzero_pd (),
12059 (__mmask8) __U);
eee5d6f5
AI
12060}
12061
6b62f323
JJ
12062extern __inline __m128d
12063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12064_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
12065 const int __C)
936c0fe4 12066{
6b62f323
JJ
12067 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12068 (__v2df) __W,
12069 (__mmask8) __U);
936c0fe4
AI
12070}
12071
6b62f323
JJ
12072extern __inline __m128d
12073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12074_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
eee5d6f5 12075{
6b62f323
JJ
12076 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12077 (__v2df)
12078 _mm_setzero_pd (),
12079 (__mmask8) __U);
eee5d6f5
AI
12080}
12081
6b62f323
JJ
12082extern __inline __m256
12083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
12085 const int __C)
936c0fe4 12086{
6b62f323
JJ
12087 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12088 (__v8sf) __W,
12089 (__mmask8) __U);
936c0fe4
AI
12090}
12091
6b62f323
JJ
12092extern __inline __m256
12093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12094_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
eee5d6f5 12095{
6b62f323
JJ
12096 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12097 (__v8sf)
12098 _mm256_setzero_ps (),
12099 (__mmask8) __U);
eee5d6f5
AI
12100}
12101
6b62f323
JJ
12102extern __inline __m128
12103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12104_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
12105 const int __C)
936c0fe4 12106{
6b62f323
JJ
12107 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12108 (__v4sf) __W,
12109 (__mmask8) __U);
936c0fe4
AI
12110}
12111
6b62f323
JJ
12112extern __inline __m128
12113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12114_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
eee5d6f5 12115{
6b62f323
JJ
12116 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12117 (__v4sf)
12118 _mm_setzero_ps (),
12119 (__mmask8) __U);
eee5d6f5
AI
12120}
12121
6b62f323
JJ
12122extern __inline __m256d
12123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12124_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
936c0fe4 12125{
6b62f323
JJ
12126 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
12127 (__v4df) __W,
12128 (__mmask8) __U);
936c0fe4
AI
12129}
12130
6b62f323
JJ
12131extern __inline __m256
12132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12133_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
eee5d6f5 12134{
6b62f323
JJ
12135 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
12136 (__v8sf) __W,
12137 (__mmask8) __U);
eee5d6f5
AI
12138}
12139
6b62f323
JJ
12140extern __inline __m256i
12141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12142_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
936c0fe4 12143{
6b62f323
JJ
12144 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
12145 (__v4di) __W,
12146 (__mmask8) __U);
936c0fe4
AI
12147}
12148
6b62f323
JJ
12149extern __inline __m256i
12150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12151_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
eee5d6f5 12152{
6b62f323
JJ
12153 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
12154 (__v8si) __W,
12155 (__mmask8) __U);
eee5d6f5
AI
12156}
12157
6b62f323
JJ
12158extern __inline __m128d
12159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12160_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
936c0fe4 12161{
6b62f323
JJ
12162 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
12163 (__v2df) __W,
12164 (__mmask8) __U);
936c0fe4
AI
12165}
12166
6b62f323
JJ
12167extern __inline __m128
12168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12169_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
eee5d6f5 12170{
6b62f323
JJ
12171 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
12172 (__v4sf) __W,
12173 (__mmask8) __U);
eee5d6f5
AI
12174}
12175
6b62f323
JJ
12176extern __inline __m128i
12177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12178_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
936c0fe4 12179{
6b62f323
JJ
12180 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
12181 (__v2di) __W,
12182 (__mmask8) __U);
936c0fe4
AI
12183}
12184
6b62f323
JJ
12185extern __inline __m128i
12186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12187_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
eee5d6f5 12188{
6b62f323
JJ
12189 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
12190 (__v4si) __W,
12191 (__mmask8) __U);
eee5d6f5
AI
12192}
12193
936c0fe4 12194extern __inline __mmask8
6b62f323
JJ
12195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12196_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
936c0fe4 12197{
6b62f323
JJ
12198 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12199 (__v4di) __Y, __P,
12200 (__mmask8) -1);
936c0fe4
AI
12201}
12202
eee5d6f5 12203extern __inline __mmask8
6b62f323
JJ
12204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12205_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
eee5d6f5 12206{
6b62f323
JJ
12207 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12208 (__v8si) __Y, __P,
12209 (__mmask8) -1);
eee5d6f5
AI
12210}
12211
936c0fe4 12212extern __inline __mmask8
6b62f323
JJ
12213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12214_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
936c0fe4 12215{
6b62f323
JJ
12216 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12217 (__v4di) __Y, __P,
c42b0bdf 12218 (__mmask8) -1);
936c0fe4
AI
12219}
12220
eee5d6f5 12221extern __inline __mmask8
6b62f323
JJ
12222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12223_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
eee5d6f5 12224{
6b62f323
JJ
12225 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12226 (__v8si) __Y, __P,
12227 (__mmask8) -1);
eee5d6f5
AI
12228}
12229
936c0fe4 12230extern __inline __mmask8
6b62f323
JJ
12231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12232_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
936c0fe4 12233{
6b62f323
JJ
12234 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12235 (__v4df) __Y, __P,
c42b0bdf 12236 (__mmask8) -1);
936c0fe4
AI
12237}
12238
eee5d6f5 12239extern __inline __mmask8
6b62f323
JJ
12240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12241_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
eee5d6f5 12242{
6b62f323
JJ
12243 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12244 (__v8sf) __Y, __P,
12245 (__mmask8) -1);
eee5d6f5
AI
12246}
12247
936c0fe4 12248extern __inline __mmask8
6b62f323
JJ
12249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12250_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12251 const int __P)
936c0fe4 12252{
6b62f323
JJ
12253 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12254 (__v4di) __Y, __P,
12255 (__mmask8) __U);
936c0fe4
AI
12256}
12257
eee5d6f5 12258extern __inline __mmask8
6b62f323
JJ
12259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12260_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12261 const int __P)
eee5d6f5 12262{
6b62f323
JJ
12263 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12264 (__v8si) __Y, __P,
12265 (__mmask8) __U);
eee5d6f5
AI
12266}
12267
936c0fe4 12268extern __inline __mmask8
6b62f323
JJ
12269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12270_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12271 const int __P)
936c0fe4 12272{
6b62f323
JJ
12273 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12274 (__v4di) __Y, __P,
12275 (__mmask8) __U);
936c0fe4
AI
12276}
12277
eee5d6f5 12278extern __inline __mmask8
6b62f323
JJ
12279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12280_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12281 const int __P)
eee5d6f5 12282{
6b62f323
JJ
12283 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12284 (__v8si) __Y, __P,
12285 (__mmask8) __U);
eee5d6f5
AI
12286}
12287
936c0fe4 12288extern __inline __mmask8
6b62f323
JJ
12289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12290_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
12291 const int __P)
936c0fe4 12292{
6b62f323
JJ
12293 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12294 (__v4df) __Y, __P,
12295 (__mmask8) __U);
936c0fe4
AI
12296}
12297
eee5d6f5 12298extern __inline __mmask8
6b62f323
JJ
12299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
12301 const int __P)
eee5d6f5 12302{
6b62f323
JJ
12303 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12304 (__v8sf) __Y, __P,
12305 (__mmask8) __U);
eee5d6f5
AI
12306}
12307
936c0fe4 12308extern __inline __mmask8
6b62f323
JJ
12309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12310_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
936c0fe4 12311{
6b62f323
JJ
12312 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12313 (__v2di) __Y, __P,
c42b0bdf 12314 (__mmask8) -1);
936c0fe4
AI
12315}
12316
eee5d6f5 12317extern __inline __mmask8
6b62f323
JJ
12318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12319_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
eee5d6f5
AI
12320{
12321 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
6b62f323
JJ
12322 (__v4si) __Y, __P,
12323 (__mmask8) -1);
eee5d6f5
AI
12324}
12325
936c0fe4 12326extern __inline __mmask8
6b62f323
JJ
12327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12328_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
936c0fe4 12329{
6b62f323
JJ
12330 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12331 (__v2di) __Y, __P,
12332 (__mmask8) -1);
936c0fe4
AI
12333}
12334
eee5d6f5 12335extern __inline __mmask8
6b62f323
JJ
12336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12337_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
eee5d6f5 12338{
6b62f323
JJ
12339 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12340 (__v4si) __Y, __P,
12341 (__mmask8) -1);
eee5d6f5
AI
12342}
12343
936c0fe4 12344extern __inline __mmask8
6b62f323
JJ
12345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12346_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
936c0fe4 12347{
6b62f323
JJ
12348 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12349 (__v2df) __Y, __P,
12350 (__mmask8) -1);
936c0fe4
AI
12351}
12352
eee5d6f5 12353extern __inline __mmask8
6b62f323
JJ
12354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12355_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
eee5d6f5 12356{
6b62f323
JJ
12357 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12358 (__v4sf) __Y, __P,
12359 (__mmask8) -1);
eee5d6f5
AI
12360}
12361
936c0fe4 12362extern __inline __mmask8
6b62f323
JJ
12363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12364_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12365 const int __P)
936c0fe4
AI
12366{
12367 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
6b62f323
JJ
12368 (__v2di) __Y, __P,
12369 (__mmask8) __U);
936c0fe4
AI
12370}
12371
eee5d6f5 12372extern __inline __mmask8
6b62f323
JJ
12373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12374_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12375 const int __P)
eee5d6f5 12376{
6b62f323
JJ
12377 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12378 (__v4si) __Y, __P,
12379 (__mmask8) __U);
eee5d6f5
AI
12380}
12381
936c0fe4 12382extern __inline __mmask8
6b62f323
JJ
12383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12384_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12385 const int __P)
936c0fe4 12386{
6b62f323
JJ
12387 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12388 (__v2di) __Y, __P,
12389 (__mmask8) __U);
936c0fe4
AI
12390}
12391
eee5d6f5 12392extern __inline __mmask8
6b62f323
JJ
12393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12394_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12395 const int __P)
eee5d6f5 12396{
6b62f323
JJ
12397 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12398 (__v4si) __Y, __P,
12399 (__mmask8) __U);
eee5d6f5
AI
12400}
12401
936c0fe4 12402extern __inline __mmask8
6b62f323
JJ
12403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12404_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
12405 const int __P)
936c0fe4 12406{
6b62f323
JJ
12407 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12408 (__v2df) __Y, __P,
12409 (__mmask8) __U);
936c0fe4
AI
12410}
12411
eee5d6f5 12412extern __inline __mmask8
6b62f323
JJ
12413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12414_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
12415 const int __P)
eee5d6f5 12416{
6b62f323
JJ
12417 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12418 (__v4sf) __Y, __P,
12419 (__mmask8) __U);
eee5d6f5
AI
12420}
12421
6b62f323
JJ
12422extern __inline __m256d
12423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12424_mm256_permutex_pd (__m256d __X, const int __M)
936c0fe4 12425{
6b62f323
JJ
12426 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
12427 (__v4df)
12428 _mm256_undefined_pd (),
12429 (__mmask8) -1);
936c0fe4
AI
12430}
12431
12432#else
12433#define _mm256_permutex_pd(X, M) \
12434 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
a25a7887
JJ
12435 (__v4df)(__m256d) \
12436 _mm256_undefined_pd (), \
936c0fe4
AI
12437 (__mmask8)-1))
12438
395a191d
SP
12439#define _mm256_permutex_epi64(X, I) \
12440 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12441 (int)(I), \
12442 (__v4di)(__m256i) \
12443 (_mm256_setzero_si256 ()),\
12444 (__mmask8) -1))
12445
936c0fe4
AI
12446#define _mm256_maskz_permutex_epi64(M, X, I) \
12447 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12448 (int)(I), \
12449 (__v4di)(__m256i) \
a25a7887 12450 (_mm256_setzero_si256 ()),\
936c0fe4
AI
12451 (__mmask8)(M)))
12452
12453#define _mm256_mask_permutex_epi64(W, M, X, I) \
12454 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12455 (int)(I), \
12456 (__v4di)(__m256i)(W), \
12457 (__mmask8)(M)))
12458
12459#define _mm256_insertf32x4(X, Y, C) \
12460 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12461 (__v4sf)(__m128) (Y), (int) (C), \
a25a7887 12462 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12463 (__mmask8)-1))
12464
12465#define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12466 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12467 (__v4sf)(__m128) (Y), (int) (C), \
12468 (__v8sf)(__m256)(W), \
12469 (__mmask8)(U)))
12470
12471#define _mm256_maskz_insertf32x4(U, X, Y, C) \
12472 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12473 (__v4sf)(__m128) (Y), (int) (C), \
a25a7887 12474 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12475 (__mmask8)(U)))
12476
12477#define _mm256_inserti32x4(X, Y, C) \
12478 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12479 (__v4si)(__m128i) (Y), (int) (C), \
a25a7887 12480 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12481 (__mmask8)-1))
12482
12483#define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12484 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12485 (__v4si)(__m128i) (Y), (int) (C), \
12486 (__v8si)(__m256i)(W), \
12487 (__mmask8)(U)))
12488
12489#define _mm256_maskz_inserti32x4(U, X, Y, C) \
12490 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12491 (__v4si)(__m128i) (Y), (int) (C), \
a25a7887 12492 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12493 (__mmask8)(U)))
12494
12495#define _mm256_extractf32x4_ps(X, C) \
12496 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12497 (int) (C), \
a25a7887 12498 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12499 (__mmask8)-1))
12500
12501#define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12502 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12503 (int) (C), \
12504 (__v4sf)(__m128)(W), \
12505 (__mmask8)(U)))
12506
12507#define _mm256_maskz_extractf32x4_ps(U, X, C) \
12508 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12509 (int) (C), \
a25a7887 12510 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12511 (__mmask8)(U)))
12512
12513#define _mm256_extracti32x4_epi32(X, C) \
12514 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12515 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12516
12517#define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12518 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12519 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12520
12521#define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12522 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12523 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12524
12525#define _mm256_shuffle_i64x2(X, Y, C) \
12526 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12527 (__v4di)(__m256i)(Y), (int)(C), \
12528 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12529 (__mmask8)-1))
12530
12531#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12532 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12533 (__v4di)(__m256i)(Y), (int)(C), \
12534 (__v4di)(__m256i)(W),\
12535 (__mmask8)(U)))
12536
12537#define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12538 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12539 (__v4di)(__m256i)(Y), (int)(C), \
12540 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12541 (__mmask8)(U)))
12542
12543#define _mm256_shuffle_i32x4(X, Y, C) \
12544 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12545 (__v8si)(__m256i)(Y), (int)(C), \
a25a7887
JJ
12546 (__v8si)(__m256i) \
12547 _mm256_setzero_si256 (), \
936c0fe4
AI
12548 (__mmask8)-1))
12549
12550#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12551 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12552 (__v8si)(__m256i)(Y), (int)(C), \
12553 (__v8si)(__m256i)(W), \
12554 (__mmask8)(U)))
12555
12556#define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12557 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12558 (__v8si)(__m256i)(Y), (int)(C), \
a25a7887
JJ
12559 (__v8si)(__m256i) \
12560 _mm256_setzero_si256 (), \
936c0fe4
AI
12561 (__mmask8)(U)))
12562
12563#define _mm256_shuffle_f64x2(X, Y, C) \
12564 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12565 (__v4df)(__m256d)(Y), (int)(C), \
a25a7887 12566 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12567 (__mmask8)-1))
12568
12569#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12570 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12571 (__v4df)(__m256d)(Y), (int)(C), \
12572 (__v4df)(__m256d)(W), \
12573 (__mmask8)(U)))
12574
12575#define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12576 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12577 (__v4df)(__m256d)(Y), (int)(C), \
a25a7887 12578 (__v4df)(__m256d)_mm256_setzero_pd( ),\
936c0fe4
AI
12579 (__mmask8)(U)))
12580
12581#define _mm256_shuffle_f32x4(X, Y, C) \
12582 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12583 (__v8sf)(__m256)(Y), (int)(C), \
a25a7887 12584 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12585 (__mmask8)-1))
12586
12587#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12588 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12589 (__v8sf)(__m256)(Y), (int)(C), \
12590 (__v8sf)(__m256)(W), \
12591 (__mmask8)(U)))
12592
12593#define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12594 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12595 (__v8sf)(__m256)(Y), (int)(C), \
a25a7887 12596 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12597 (__mmask8)(U)))
12598
12599#define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12600 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12601 (__v4df)(__m256d)(B), (int)(C), \
12602 (__v4df)(__m256d)(W), \
12603 (__mmask8)(U)))
12604
12605#define _mm256_maskz_shuffle_pd(U, A, B, C) \
12606 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12607 (__v4df)(__m256d)(B), (int)(C), \
a25a7887
JJ
12608 (__v4df)(__m256d) \
12609 _mm256_setzero_pd (), \
936c0fe4
AI
12610 (__mmask8)(U)))
12611
12612#define _mm_mask_shuffle_pd(W, U, A, B, C) \
12613 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12614 (__v2df)(__m128d)(B), (int)(C), \
12615 (__v2df)(__m128d)(W), \
12616 (__mmask8)(U)))
12617
12618#define _mm_maskz_shuffle_pd(U, A, B, C) \
12619 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12620 (__v2df)(__m128d)(B), (int)(C), \
a25a7887 12621 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12622 (__mmask8)(U)))
12623
12624#define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12625 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12626 (__v8sf)(__m256)(B), (int)(C), \
12627 (__v8sf)(__m256)(W), \
12628 (__mmask8)(U)))
12629
12630#define _mm256_maskz_shuffle_ps(U, A, B, C) \
12631 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12632 (__v8sf)(__m256)(B), (int)(C), \
a25a7887 12633 (__v8sf)(__m256)_mm256_setzero_ps (),\
936c0fe4
AI
12634 (__mmask8)(U)))
12635
12636#define _mm_mask_shuffle_ps(W, U, A, B, C) \
12637 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12638 (__v4sf)(__m128)(B), (int)(C), \
12639 (__v4sf)(__m128)(W), \
12640 (__mmask8)(U)))
12641
12642#define _mm_maskz_shuffle_ps(U, A, B, C) \
12643 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12644 (__v4sf)(__m128)(B), (int)(C), \
a25a7887 12645 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12646 (__mmask8)(U)))
12647
ce2ad8cc 12648#define _mm256_fixupimm_pd(X, Y, C) \
936c0fe4 12649 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
ce2ad8cc 12650 (__v4di)(__m256i)(Y), (int)(C), \
936c0fe4
AI
12651 (__mmask8)(-1)))
12652
ce2ad8cc 12653#define _mm256_mask_fixupimm_pd(W, U, X, Y, C) \
936c0fe4 12654 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
ce2ad8cc
WX
12655 (__v4di)(__m256i)(Y), (int)(C), \
12656 (__v4df)(__m256d)(W), \
936c0fe4
AI
12657 (__mmask8)(U)))
12658
ce2ad8cc 12659#define _mm256_maskz_fixupimm_pd(U, X, Y, C) \
936c0fe4 12660 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
ce2ad8cc
WX
12661 (__v4di)(__m256i)(Y), \
12662 (int)(C),\
936c0fe4
AI
12663 (__mmask8)(U)))
12664
ce2ad8cc 12665#define _mm256_fixupimm_ps(X, Y, C) \
936c0fe4 12666 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
ce2ad8cc 12667 (__v8si)(__m256i)(Y), (int)(C), \
936c0fe4
AI
12668 (__mmask8)(-1)))
12669
12670
ce2ad8cc 12671#define _mm256_mask_fixupimm_ps(W, U, X, Y, C) \
936c0fe4 12672 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
ce2ad8cc
WX
12673 (__v8si)(__m256i)(Y), (int)(C), \
12674 (__v8sf)(__m256)(W), \
936c0fe4
AI
12675 (__mmask8)(U)))
12676
ce2ad8cc 12677#define _mm256_maskz_fixupimm_ps(U, X, Y, C) \
936c0fe4 12678 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
ce2ad8cc
WX
12679 (__v8si)(__m256i)(Y), \
12680 (int)(C),\
936c0fe4
AI
12681 (__mmask8)(U)))
12682
ce2ad8cc 12683#define _mm_fixupimm_pd(X, Y, C) \
936c0fe4 12684 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
ce2ad8cc 12685 (__v2di)(__m128i)(Y), (int)(C), \
936c0fe4
AI
12686 (__mmask8)(-1)))
12687
12688
ce2ad8cc 12689#define _mm_mask_fixupimm_pd(W, U, X, Y, C) \
936c0fe4 12690 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
ce2ad8cc
WX
12691 (__v2di)(__m128i)(Y), (int)(C), \
12692 (__v2df)(__m128d)(W), \
936c0fe4
AI
12693 (__mmask8)(U)))
12694
ce2ad8cc 12695#define _mm_maskz_fixupimm_pd(U, X, Y, C) \
936c0fe4 12696 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
ce2ad8cc
WX
12697 (__v2di)(__m128i)(Y), \
12698 (int)(C),\
936c0fe4
AI
12699 (__mmask8)(U)))
12700
ce2ad8cc 12701#define _mm_fixupimm_ps(X, Y, C) \
936c0fe4 12702 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
ce2ad8cc 12703 (__v4si)(__m128i)(Y), (int)(C), \
936c0fe4
AI
12704 (__mmask8)(-1)))
12705
ce2ad8cc 12706#define _mm_mask_fixupimm_ps(W, U, X, Y, C) \
936c0fe4 12707 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
ce2ad8cc
WX
12708 (__v4si)(__m128i)(Y), (int)(C),\
12709 (__v4sf)(__m128)(W), \
936c0fe4
AI
12710 (__mmask8)(U)))
12711
ce2ad8cc 12712#define _mm_maskz_fixupimm_ps(U, X, Y, C) \
936c0fe4 12713 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
ce2ad8cc
WX
12714 (__v4si)(__m128i)(Y), \
12715 (int)(C),\
936c0fe4
AI
12716 (__mmask8)(U)))
12717
12718#define _mm256_mask_srli_epi32(W, U, A, B) \
12719 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12720 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12721
12722#define _mm256_maskz_srli_epi32(U, A, B) \
12723 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
a25a7887 12724 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
936c0fe4
AI
12725
12726#define _mm_mask_srli_epi32(W, U, A, B) \
12727 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12728 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12729
12730#define _mm_maskz_srli_epi32(U, A, B) \
12731 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
a25a7887 12732 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
12733
12734#define _mm256_mask_srli_epi64(W, U, A, B) \
12735 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12736 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12737
12738#define _mm256_maskz_srli_epi64(U, A, B) \
12739 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12740 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12741
12742#define _mm_mask_srli_epi64(W, U, A, B) \
12743 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12744 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12745
12746#define _mm_maskz_srli_epi64(U, A, B) \
12747 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 12748 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
12749
12750#define _mm256_mask_slli_epi32(W, U, X, C) \
12751 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
a25a7887 12752 (__v8si)(__m256i)(W), \
936c0fe4
AI
12753 (__mmask8)(U)))
12754
12755#define _mm256_maskz_slli_epi32(U, X, C) \
12756 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
a25a7887 12757 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12758 (__mmask8)(U)))
12759
12760#define _mm256_mask_slli_epi64(W, U, X, C) \
12761 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
a25a7887 12762 (__v4di)(__m256i)(W), \
936c0fe4
AI
12763 (__mmask8)(U)))
12764
12765#define _mm256_maskz_slli_epi64(U, X, C) \
12766 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
a25a7887 12767 (__v4di)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12768 (__mmask8)(U)))
12769
12770#define _mm_mask_slli_epi32(W, U, X, C) \
12771 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12772 (__v4si)(__m128i)(W),\
12773 (__mmask8)(U)))
12774
12775#define _mm_maskz_slli_epi32(U, X, C) \
12776 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12777 (__v4si)(__m128i)_mm_setzero_si128 (),\
12778 (__mmask8)(U)))
12779
12780#define _mm_mask_slli_epi64(W, U, X, C) \
12781 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12782 (__v2di)(__m128i)(W),\
12783 (__mmask8)(U)))
12784
12785#define _mm_maskz_slli_epi64(U, X, C) \
12786 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
a25a7887 12787 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
12788 (__mmask8)(U)))
12789
12790#define _mm256_ternarylogic_epi64(A, B, C, I) \
12791 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12792 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12793
12794#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12795 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12796 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12797
12798#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12799 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12800 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12801
12802#define _mm256_ternarylogic_epi32(A, B, C, I) \
12803 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12804 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12805
12806#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12807 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12808 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12809
12810#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12811 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12812 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12813
12814#define _mm_ternarylogic_epi64(A, B, C, I) \
12815 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12816 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12817
12818#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12819 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12820 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12821
12822#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12823 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12824 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12825
12826#define _mm_ternarylogic_epi32(A, B, C, I) \
12827 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12828 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12829
12830#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12831 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12832 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12833
12834#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12835 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12836 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12837
12838#define _mm256_roundscale_ps(A, B) \
12839 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
a25a7887 12840 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
936c0fe4
AI
12841
12842#define _mm256_mask_roundscale_ps(W, U, A, B) \
12843 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12844 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12845
12846#define _mm256_maskz_roundscale_ps(U, A, B) \
12847 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
a25a7887 12848 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
936c0fe4
AI
12849
12850#define _mm256_roundscale_pd(A, B) \
12851 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
a25a7887 12852 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
936c0fe4
AI
12853
12854#define _mm256_mask_roundscale_pd(W, U, A, B) \
12855 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12856 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12857
12858#define _mm256_maskz_roundscale_pd(U, A, B) \
12859 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
a25a7887 12860 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
12861
12862#define _mm_roundscale_ps(A, B) \
12863 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
a25a7887 12864 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
936c0fe4
AI
12865
12866#define _mm_mask_roundscale_ps(W, U, A, B) \
12867 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12868 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12869
12870#define _mm_maskz_roundscale_ps(U, A, B) \
12871 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
a25a7887 12872 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
936c0fe4
AI
12873
12874#define _mm_roundscale_pd(A, B) \
12875 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
a25a7887 12876 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
936c0fe4
AI
12877
12878#define _mm_mask_roundscale_pd(W, U, A, B) \
12879 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12880 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12881
12882#define _mm_maskz_roundscale_pd(U, A, B) \
12883 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
a25a7887 12884 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
12885
12886#define _mm256_getmant_ps(X, B, C) \
12887 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12888 (int)(((C)<<2) | (B)), \
a25a7887 12889 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12890 (__mmask8)-1))
12891
12892#define _mm256_mask_getmant_ps(W, U, X, B, C) \
12893 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12894 (int)(((C)<<2) | (B)), \
12895 (__v8sf)(__m256)(W), \
12896 (__mmask8)(U)))
12897
12898#define _mm256_maskz_getmant_ps(U, X, B, C) \
12899 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12900 (int)(((C)<<2) | (B)), \
a25a7887 12901 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12902 (__mmask8)(U)))
12903
12904#define _mm_getmant_ps(X, B, C) \
12905 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12906 (int)(((C)<<2) | (B)), \
a25a7887 12907 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12908 (__mmask8)-1))
12909
12910#define _mm_mask_getmant_ps(W, U, X, B, C) \
12911 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12912 (int)(((C)<<2) | (B)), \
12913 (__v4sf)(__m128)(W), \
12914 (__mmask8)(U)))
12915
12916#define _mm_maskz_getmant_ps(U, X, B, C) \
12917 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12918 (int)(((C)<<2) | (B)), \
a25a7887 12919 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12920 (__mmask8)(U)))
12921
12922#define _mm256_getmant_pd(X, B, C) \
12923 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12924 (int)(((C)<<2) | (B)), \
a25a7887 12925 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12926 (__mmask8)-1))
12927
12928#define _mm256_mask_getmant_pd(W, U, X, B, C) \
12929 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12930 (int)(((C)<<2) | (B)), \
12931 (__v4df)(__m256d)(W), \
12932 (__mmask8)(U)))
12933
12934#define _mm256_maskz_getmant_pd(U, X, B, C) \
12935 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12936 (int)(((C)<<2) | (B)), \
a25a7887 12937 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12938 (__mmask8)(U)))
12939
12940#define _mm_getmant_pd(X, B, C) \
12941 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12942 (int)(((C)<<2) | (B)), \
a25a7887 12943 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12944 (__mmask8)-1))
12945
12946#define _mm_mask_getmant_pd(W, U, X, B, C) \
12947 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12948 (int)(((C)<<2) | (B)), \
12949 (__v2df)(__m128d)(W), \
12950 (__mmask8)(U)))
12951
12952#define _mm_maskz_getmant_pd(U, X, B, C) \
12953 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12954 (int)(((C)<<2) | (B)), \
a25a7887 12955 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12956 (__mmask8)(U)))
12957
12958#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12959 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
0e171d54 12960 (void const *)ADDR, \
936c0fe4
AI
12961 (__v8si)(__m256i)INDEX, \
12962 (__mmask8)MASK, (int)SCALE)
12963
12964#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12965 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
0e171d54 12966 (void const *)ADDR, \
936c0fe4
AI
12967 (__v4si)(__m128i)INDEX, \
12968 (__mmask8)MASK, (int)SCALE)
12969
12970#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12971 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
0e171d54 12972 (void const *)ADDR, \
936c0fe4
AI
12973 (__v4si)(__m128i)INDEX, \
12974 (__mmask8)MASK, (int)SCALE)
12975
12976#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12977 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
0e171d54 12978 (void const *)ADDR, \
936c0fe4
AI
12979 (__v4si)(__m128i)INDEX, \
12980 (__mmask8)MASK, (int)SCALE)
12981
12982#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12983 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
0e171d54 12984 (void const *)ADDR, \
936c0fe4
AI
12985 (__v4di)(__m256i)INDEX, \
12986 (__mmask8)MASK, (int)SCALE)
12987
12988#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12989 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
0e171d54 12990 (void const *)ADDR, \
936c0fe4
AI
12991 (__v2di)(__m128i)INDEX, \
12992 (__mmask8)MASK, (int)SCALE)
12993
12994#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12995 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
0e171d54 12996 (void const *)ADDR, \
936c0fe4
AI
12997 (__v4di)(__m256i)INDEX, \
12998 (__mmask8)MASK, (int)SCALE)
12999
13000#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13001 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
0e171d54 13002 (void const *)ADDR, \
936c0fe4
AI
13003 (__v2di)(__m128i)INDEX, \
13004 (__mmask8)MASK, (int)SCALE)
13005
13006#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13007 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
0e171d54 13008 (void const *)ADDR, \
936c0fe4
AI
13009 (__v8si)(__m256i)INDEX, \
13010 (__mmask8)MASK, (int)SCALE)
13011
13012#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13013 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
0e171d54 13014 (void const *)ADDR, \
936c0fe4
AI
13015 (__v4si)(__m128i)INDEX, \
13016 (__mmask8)MASK, (int)SCALE)
13017
13018#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13019 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
0e171d54 13020 (void const *)ADDR, \
936c0fe4
AI
13021 (__v4si)(__m128i)INDEX, \
13022 (__mmask8)MASK, (int)SCALE)
13023
13024#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13025 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
0e171d54 13026 (void const *)ADDR, \
936c0fe4
AI
13027 (__v4si)(__m128i)INDEX, \
13028 (__mmask8)MASK, (int)SCALE)
13029
13030#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13031 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
0e171d54 13032 (void const *)ADDR, \
936c0fe4
AI
13033 (__v4di)(__m256i)INDEX, \
13034 (__mmask8)MASK, (int)SCALE)
13035
13036#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13037 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
0e171d54 13038 (void const *)ADDR, \
936c0fe4
AI
13039 (__v2di)(__m128i)INDEX, \
13040 (__mmask8)MASK, (int)SCALE)
13041
13042#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13043 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
0e171d54 13044 (void const *)ADDR, \
936c0fe4
AI
13045 (__v4di)(__m256i)INDEX, \
13046 (__mmask8)MASK, (int)SCALE)
13047
13048#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13049 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
0e171d54 13050 (void const *)ADDR, \
936c0fe4
AI
13051 (__v2di)(__m128i)INDEX, \
13052 (__mmask8)MASK, (int)SCALE)
13053
13054#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13055 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13056 (__v8si)(__m256i)INDEX, \
13057 (__v8sf)(__m256)V1, (int)SCALE)
13058
13059#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13060 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13061 (__v8si)(__m256i)INDEX, \
13062 (__v8sf)(__m256)V1, (int)SCALE)
13063
13064#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13065 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13066 (__v4si)(__m128i)INDEX, \
13067 (__v4sf)(__m128)V1, (int)SCALE)
13068
13069#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13070 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13071 (__v4si)(__m128i)INDEX, \
13072 (__v4sf)(__m128)V1, (int)SCALE)
13073
13074#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13075 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13076 (__v4si)(__m128i)INDEX, \
13077 (__v4df)(__m256d)V1, (int)SCALE)
13078
13079#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13080 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13081 (__v4si)(__m128i)INDEX, \
13082 (__v4df)(__m256d)V1, (int)SCALE)
13083
13084#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13085 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13086 (__v4si)(__m128i)INDEX, \
13087 (__v2df)(__m128d)V1, (int)SCALE)
13088
13089#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13090 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13091 (__v4si)(__m128i)INDEX, \
13092 (__v2df)(__m128d)V1, (int)SCALE)
13093
13094#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13095 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13096 (__v4di)(__m256i)INDEX, \
13097 (__v4sf)(__m128)V1, (int)SCALE)
13098
13099#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13100 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13101 (__v4di)(__m256i)INDEX, \
13102 (__v4sf)(__m128)V1, (int)SCALE)
13103
13104#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13105 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13106 (__v2di)(__m128i)INDEX, \
13107 (__v4sf)(__m128)V1, (int)SCALE)
13108
13109#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13110 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13111 (__v2di)(__m128i)INDEX, \
13112 (__v4sf)(__m128)V1, (int)SCALE)
13113
13114#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13115 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13116 (__v4di)(__m256i)INDEX, \
13117 (__v4df)(__m256d)V1, (int)SCALE)
13118
13119#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13120 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13121 (__v4di)(__m256i)INDEX, \
13122 (__v4df)(__m256d)V1, (int)SCALE)
13123
13124#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13125 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13126 (__v2di)(__m128i)INDEX, \
13127 (__v2df)(__m128d)V1, (int)SCALE)
13128
13129#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13130 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13131 (__v2di)(__m128i)INDEX, \
13132 (__v2df)(__m128d)V1, (int)SCALE)
13133
13134#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13135 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13136 (__v8si)(__m256i)INDEX, \
13137 (__v8si)(__m256i)V1, (int)SCALE)
13138
13139#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13140 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13141 (__v8si)(__m256i)INDEX, \
13142 (__v8si)(__m256i)V1, (int)SCALE)
13143
13144#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13145 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13146 (__v4si)(__m128i)INDEX, \
13147 (__v4si)(__m128i)V1, (int)SCALE)
13148
13149#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13150 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13151 (__v4si)(__m128i)INDEX, \
13152 (__v4si)(__m128i)V1, (int)SCALE)
13153
13154#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13155 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13156 (__v4si)(__m128i)INDEX, \
13157 (__v4di)(__m256i)V1, (int)SCALE)
13158
13159#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13160 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13161 (__v4si)(__m128i)INDEX, \
13162 (__v4di)(__m256i)V1, (int)SCALE)
13163
13164#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13165 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13166 (__v4si)(__m128i)INDEX, \
13167 (__v2di)(__m128i)V1, (int)SCALE)
13168
13169#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13170 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13171 (__v4si)(__m128i)INDEX, \
13172 (__v2di)(__m128i)V1, (int)SCALE)
13173
13174#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13175 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13176 (__v4di)(__m256i)INDEX, \
13177 (__v4si)(__m128i)V1, (int)SCALE)
13178
13179#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13180 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13181 (__v4di)(__m256i)INDEX, \
13182 (__v4si)(__m128i)V1, (int)SCALE)
13183
13184#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13185 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13186 (__v2di)(__m128i)INDEX, \
13187 (__v4si)(__m128i)V1, (int)SCALE)
13188
13189#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13190 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13191 (__v2di)(__m128i)INDEX, \
13192 (__v4si)(__m128i)V1, (int)SCALE)
13193
13194#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13195 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13196 (__v4di)(__m256i)INDEX, \
13197 (__v4di)(__m256i)V1, (int)SCALE)
13198
13199#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13200 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13201 (__v4di)(__m256i)INDEX, \
13202 (__v4di)(__m256i)V1, (int)SCALE)
13203
13204#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13205 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13206 (__v2di)(__m128i)INDEX, \
13207 (__v2di)(__m128i)V1, (int)SCALE)
13208
13209#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13210 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13211 (__v2di)(__m128i)INDEX, \
13212 (__v2di)(__m128i)V1, (int)SCALE)
13213
13214#define _mm256_mask_shuffle_epi32(W, U, X, C) \
13215 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13216 (__v8si)(__m256i)(W), \
13217 (__mmask8)(U)))
13218
13219#define _mm256_maskz_shuffle_epi32(U, X, C) \
13220 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
a25a7887
JJ
13221 (__v8si)(__m256i) \
13222 _mm256_setzero_si256 (), \
936c0fe4
AI
13223 (__mmask8)(U)))
13224
13225#define _mm_mask_shuffle_epi32(W, U, X, C) \
13226 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13227 (__v4si)(__m128i)(W), \
13228 (__mmask8)(U)))
13229
13230#define _mm_maskz_shuffle_epi32(U, X, C) \
13231 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
a25a7887 13232 (__v4si)(__m128i)_mm_setzero_si128 (), \
936c0fe4
AI
13233 (__mmask8)(U)))
13234
13235#define _mm256_rol_epi64(A, B) \
13236 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13237 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13238 (__mmask8)-1))
13239
13240#define _mm256_mask_rol_epi64(W, U, A, B) \
13241 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13242 (__v4di)(__m256i)(W), \
13243 (__mmask8)(U)))
13244
13245#define _mm256_maskz_rol_epi64(U, A, B) \
13246 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13247 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13248 (__mmask8)(U)))
13249
13250#define _mm_rol_epi64(A, B) \
13251 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13252 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13253 (__mmask8)-1))
13254
13255#define _mm_mask_rol_epi64(W, U, A, B) \
13256 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13257 (__v2di)(__m128i)(W), \
13258 (__mmask8)(U)))
13259
13260#define _mm_maskz_rol_epi64(U, A, B) \
13261 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13262 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13263 (__mmask8)(U)))
13264
13265#define _mm256_ror_epi64(A, B) \
13266 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13267 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13268 (__mmask8)-1))
13269
13270#define _mm256_mask_ror_epi64(W, U, A, B) \
13271 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13272 (__v4di)(__m256i)(W), \
13273 (__mmask8)(U)))
13274
13275#define _mm256_maskz_ror_epi64(U, A, B) \
13276 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13277 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13278 (__mmask8)(U)))
13279
13280#define _mm_ror_epi64(A, B) \
13281 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13282 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13283 (__mmask8)-1))
13284
13285#define _mm_mask_ror_epi64(W, U, A, B) \
13286 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13287 (__v2di)(__m128i)(W), \
13288 (__mmask8)(U)))
13289
13290#define _mm_maskz_ror_epi64(U, A, B) \
13291 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13292 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13293 (__mmask8)(U)))
13294
13295#define _mm256_rol_epi32(A, B) \
13296 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13297 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13298 (__mmask8)-1))
13299
13300#define _mm256_mask_rol_epi32(W, U, A, B) \
13301 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13302 (__v8si)(__m256i)(W), \
13303 (__mmask8)(U)))
13304
13305#define _mm256_maskz_rol_epi32(U, A, B) \
13306 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13307 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13308 (__mmask8)(U)))
13309
13310#define _mm_rol_epi32(A, B) \
13311 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13312 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13313 (__mmask8)-1))
13314
13315#define _mm_mask_rol_epi32(W, U, A, B) \
13316 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13317 (__v4si)(__m128i)(W), \
13318 (__mmask8)(U)))
13319
13320#define _mm_maskz_rol_epi32(U, A, B) \
13321 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13322 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13323 (__mmask8)(U)))
13324
13325#define _mm256_ror_epi32(A, B) \
13326 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13327 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13328 (__mmask8)-1))
13329
13330#define _mm256_mask_ror_epi32(W, U, A, B) \
13331 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13332 (__v8si)(__m256i)(W), \
13333 (__mmask8)(U)))
13334
13335#define _mm256_maskz_ror_epi32(U, A, B) \
13336 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887
JJ
13337 (__v8si)(__m256i) \
13338 _mm256_setzero_si256 (), \
936c0fe4
AI
13339 (__mmask8)(U)))
13340
13341#define _mm_ror_epi32(A, B) \
13342 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13343 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13344 (__mmask8)-1))
13345
13346#define _mm_mask_ror_epi32(W, U, A, B) \
13347 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13348 (__v4si)(__m128i)(W), \
13349 (__mmask8)(U)))
13350
13351#define _mm_maskz_ror_epi32(U, A, B) \
13352 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13353 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13354 (__mmask8)(U)))
13355
13356#define _mm256_alignr_epi32(X, Y, C) \
13357 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13358 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13359
13360#define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13361 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13362 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13363
13364#define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13365 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13366 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13367 (__mmask8)(U)))
13368
13369#define _mm256_alignr_epi64(X, Y, C) \
13370 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13371 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13372
13373#define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13374 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13375 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13376
13377#define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13378 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13379 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13380 (__mmask8)(U)))
13381
13382#define _mm_alignr_epi32(X, Y, C) \
13383 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13384 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13385
13386#define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13387 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13388 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13389
13390#define _mm_maskz_alignr_epi32(U, X, Y, C) \
13391 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
a25a7887 13392 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13393 (__mmask8)(U)))
13394
13395#define _mm_alignr_epi64(X, Y, C) \
13396 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13397 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13398
13399#define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13400 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13401 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13402
13403#define _mm_maskz_alignr_epi64(U, X, Y, C) \
13404 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
a25a7887 13405 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13406 (__mmask8)(U)))
13407
13408#define _mm_mask_cvtps_ph(W, U, A, I) \
13409 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13410 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13411
13412#define _mm_maskz_cvtps_ph(U, A, I) \
13413 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
a25a7887 13414 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
936c0fe4
AI
13415
13416#define _mm256_mask_cvtps_ph(W, U, A, I) \
13417 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13418 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13419
13420#define _mm256_maskz_cvtps_ph(U, A, I) \
13421 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
a25a7887 13422 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
936c0fe4
AI
13423
13424#define _mm256_mask_srai_epi32(W, U, A, B) \
13425 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13426 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13427
13428#define _mm256_maskz_srai_epi32(U, A, B) \
13429 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
a25a7887 13430 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
936c0fe4
AI
13431
13432#define _mm_mask_srai_epi32(W, U, A, B) \
13433 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13434 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13435
13436#define _mm_maskz_srai_epi32(U, A, B) \
13437 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
a25a7887 13438 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
13439
13440#define _mm256_srai_epi64(A, B) \
13441 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13442 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13443
13444#define _mm256_mask_srai_epi64(W, U, A, B) \
13445 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13446 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13447
13448#define _mm256_maskz_srai_epi64(U, A, B) \
13449 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13450 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13451
13452#define _mm_srai_epi64(A, B) \
13453 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 13454 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
936c0fe4
AI
13455
13456#define _mm_mask_srai_epi64(W, U, A, B) \
13457 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13458 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13459
13460#define _mm_maskz_srai_epi64(U, A, B) \
13461 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 13462 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
13463
13464#define _mm256_mask_permutex_pd(W, U, A, B) \
13465 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13466 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13467
13468#define _mm256_maskz_permutex_pd(U, A, B) \
13469 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
a25a7887 13470 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
13471
13472#define _mm256_mask_permute_pd(W, U, X, C) \
13473 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13474 (__v4df)(__m256d)(W), \
13475 (__mmask8)(U)))
13476
13477#define _mm256_maskz_permute_pd(U, X, C) \
13478 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
a25a7887 13479 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
13480 (__mmask8)(U)))
13481
13482#define _mm256_mask_permute_ps(W, U, X, C) \
13483 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13484 (__v8sf)(__m256)(W), (__mmask8)(U)))
13485
13486#define _mm256_maskz_permute_ps(U, X, C) \
13487 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
a25a7887 13488 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
13489 (__mmask8)(U)))
13490
13491#define _mm_mask_permute_pd(W, U, X, C) \
13492 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13493 (__v2df)(__m128d)(W), (__mmask8)(U)))
13494
13495#define _mm_maskz_permute_pd(U, X, C) \
13496 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
a25a7887 13497 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
13498 (__mmask8)(U)))
13499
13500#define _mm_mask_permute_ps(W, U, X, C) \
13501 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13502 (__v4sf)(__m128)(W), (__mmask8)(U)))
13503
13504#define _mm_maskz_permute_ps(U, X, C) \
13505 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
a25a7887 13506 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
13507 (__mmask8)(U)))
13508
13509#define _mm256_mask_blend_pd(__U, __A, __W) \
13510 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13511 (__v4df) (__W), \
13512 (__mmask8) (__U)))
13513
13514#define _mm256_mask_blend_ps(__U, __A, __W) \
13515 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13516 (__v8sf) (__W), \
13517 (__mmask8) (__U)))
13518
13519#define _mm256_mask_blend_epi64(__U, __A, __W) \
13520 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13521 (__v4di) (__W), \
13522 (__mmask8) (__U)))
13523
13524#define _mm256_mask_blend_epi32(__U, __A, __W) \
13525 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13526 (__v8si) (__W), \
13527 (__mmask8) (__U)))
13528
13529#define _mm_mask_blend_pd(__U, __A, __W) \
13530 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13531 (__v2df) (__W), \
13532 (__mmask8) (__U)))
13533
13534#define _mm_mask_blend_ps(__U, __A, __W) \
13535 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13536 (__v4sf) (__W), \
13537 (__mmask8) (__U)))
13538
13539#define _mm_mask_blend_epi64(__U, __A, __W) \
13540 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13541 (__v2di) (__W), \
13542 (__mmask8) (__U)))
13543
13544#define _mm_mask_blend_epi32(__U, __A, __W) \
13545 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13546 (__v4si) (__W), \
13547 (__mmask8) (__U)))
13548
13549#define _mm256_cmp_epu32_mask(X, Y, P) \
13550 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13551 (__v8si)(__m256i)(Y), (int)(P),\
13552 (__mmask8)-1))
13553
13554#define _mm256_cmp_epi64_mask(X, Y, P) \
13555 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13556 (__v4di)(__m256i)(Y), (int)(P),\
13557 (__mmask8)-1))
13558
13559#define _mm256_cmp_epi32_mask(X, Y, P) \
13560 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13561 (__v8si)(__m256i)(Y), (int)(P),\
13562 (__mmask8)-1))
13563
13564#define _mm256_cmp_epu64_mask(X, Y, P) \
13565 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13566 (__v4di)(__m256i)(Y), (int)(P),\
13567 (__mmask8)-1))
13568
13569#define _mm256_cmp_pd_mask(X, Y, P) \
13570 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13571 (__v4df)(__m256d)(Y), (int)(P),\
13572 (__mmask8)-1))
13573
13574#define _mm256_cmp_ps_mask(X, Y, P) \
13575 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13576 (__v8sf)(__m256)(Y), (int)(P),\
13577 (__mmask8)-1))
13578
13579#define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13580 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13581 (__v4di)(__m256i)(Y), (int)(P),\
13582 (__mmask8)(M)))
13583
13584#define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13585 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13586 (__v8si)(__m256i)(Y), (int)(P),\
13587 (__mmask8)(M)))
13588
13589#define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13590 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13591 (__v4di)(__m256i)(Y), (int)(P),\
13592 (__mmask8)(M)))
13593
13594#define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13595 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13596 (__v8si)(__m256i)(Y), (int)(P),\
13597 (__mmask8)(M)))
13598
13599#define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13600 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13601 (__v4df)(__m256d)(Y), (int)(P),\
13602 (__mmask8)(M)))
13603
13604#define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13605 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13606 (__v8sf)(__m256)(Y), (int)(P),\
13607 (__mmask8)(M)))
13608
13609#define _mm_cmp_epi64_mask(X, Y, P) \
13610 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13611 (__v2di)(__m128i)(Y), (int)(P),\
13612 (__mmask8)-1))
13613
13614#define _mm_cmp_epi32_mask(X, Y, P) \
13615 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13616 (__v4si)(__m128i)(Y), (int)(P),\
13617 (__mmask8)-1))
13618
13619#define _mm_cmp_epu64_mask(X, Y, P) \
13620 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13621 (__v2di)(__m128i)(Y), (int)(P),\
13622 (__mmask8)-1))
13623
13624#define _mm_cmp_epu32_mask(X, Y, P) \
13625 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13626 (__v4si)(__m128i)(Y), (int)(P),\
13627 (__mmask8)-1))
13628
13629#define _mm_cmp_pd_mask(X, Y, P) \
13630 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13631 (__v2df)(__m128d)(Y), (int)(P),\
13632 (__mmask8)-1))
13633
13634#define _mm_cmp_ps_mask(X, Y, P) \
13635 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13636 (__v4sf)(__m128)(Y), (int)(P),\
13637 (__mmask8)-1))
13638
13639#define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13640 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13641 (__v2di)(__m128i)(Y), (int)(P),\
13642 (__mmask8)(M)))
13643
13644#define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13645 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13646 (__v4si)(__m128i)(Y), (int)(P),\
13647 (__mmask8)(M)))
13648
13649#define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13650 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13651 (__v2di)(__m128i)(Y), (int)(P),\
13652 (__mmask8)(M)))
13653
13654#define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13655 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13656 (__v4si)(__m128i)(Y), (int)(P),\
13657 (__mmask8)(M)))
13658
13659#define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13660 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13661 (__v2df)(__m128d)(Y), (int)(P),\
13662 (__mmask8)(M)))
13663
13664#define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13665 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13666 (__v4sf)(__m128)(Y), (int)(P),\
13667 (__mmask8)(M)))
13668
13669#endif
13670
a25a7887 13671#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
936c0fe4
AI
13672
13673#ifdef __DISABLE_AVX512VL__
13674#undef __DISABLE_AVX512VL__
13675#pragma GCC pop_options
13676#endif /* __DISABLE_AVX512VL__ */
13677
13678#endif /* _AVX512VLINTRIN_H_INCLUDED */