]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512vlintrin.h
Daily bump.
[thirdparty/gcc.git] / gcc / config / i386 / avx512vlintrin.h
CommitLineData
85ec4feb 1/* Copyright (C) 2014-2018 Free Software Foundation, Inc.
936c0fe4
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLINTRIN_H_INCLUDED
29#define _AVX512VLINTRIN_H_INCLUDED
30
936c0fe4
AI
31#ifndef __AVX512VL__
32#pragma GCC push_options
33#pragma GCC target("avx512vl")
34#define __DISABLE_AVX512VL__
35#endif /* __AVX512VL__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef unsigned int __mmask32;
39
40extern __inline __m256d
41__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
42_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
43{
44 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
45 (__v4df) __W,
46 (__mmask8) __U);
47}
48
49extern __inline __m256d
50__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
51_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
52{
53 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
54 (__v4df)
55 _mm256_setzero_pd (),
56 (__mmask8) __U);
57}
58
59extern __inline __m128d
60__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
62{
63 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
64 (__v2df) __W,
65 (__mmask8) __U);
66}
67
68extern __inline __m128d
69__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
71{
72 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
73 (__v2df)
74 _mm_setzero_pd (),
75 (__mmask8) __U);
76}
77
78extern __inline __m256d
79__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
81{
82 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
83 (__v4df) __W,
84 (__mmask8) __U);
85}
86
87extern __inline __m256d
88__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
90{
91 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
92 (__v4df)
93 _mm256_setzero_pd (),
94 (__mmask8) __U);
95}
96
97extern __inline __m128d
98__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
100{
101 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
102 (__v2df) __W,
103 (__mmask8) __U);
104}
105
106extern __inline __m128d
107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108_mm_maskz_load_pd (__mmask8 __U, void const *__P)
109{
110 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
111 (__v2df)
112 _mm_setzero_pd (),
113 (__mmask8) __U);
114}
115
116extern __inline void
117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
119{
120 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
121 (__v4df) __A,
122 (__mmask8) __U);
123}
124
125extern __inline void
126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
128{
129 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
130 (__v2df) __A,
131 (__mmask8) __U);
132}
133
134extern __inline __m256
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
137{
138 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
139 (__v8sf) __W,
140 (__mmask8) __U);
141}
142
143extern __inline __m256
144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
146{
147 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
148 (__v8sf)
149 _mm256_setzero_ps (),
150 (__mmask8) __U);
151}
152
153extern __inline __m128
154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
156{
157 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
158 (__v4sf) __W,
159 (__mmask8) __U);
160}
161
162extern __inline __m128
163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
165{
166 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
167 (__v4sf)
168 _mm_setzero_ps (),
169 (__mmask8) __U);
170}
171
172extern __inline __m256
173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
175{
176 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
177 (__v8sf) __W,
178 (__mmask8) __U);
179}
180
181extern __inline __m256
182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
184{
185 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
186 (__v8sf)
187 _mm256_setzero_ps (),
188 (__mmask8) __U);
189}
190
191extern __inline __m128
192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
194{
195 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
196 (__v4sf) __W,
197 (__mmask8) __U);
198}
199
200extern __inline __m128
201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202_mm_maskz_load_ps (__mmask8 __U, void const *__P)
203{
204 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
205 (__v4sf)
206 _mm_setzero_ps (),
207 (__mmask8) __U);
208}
209
210extern __inline void
211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
213{
214 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
215 (__v8sf) __A,
216 (__mmask8) __U);
217}
218
219extern __inline void
220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
222{
223 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
224 (__v4sf) __A,
225 (__mmask8) __U);
226}
227
228extern __inline __m256i
229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
231{
232 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
233 (__v4di) __W,
234 (__mmask8) __U);
235}
236
237extern __inline __m256i
238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
240{
241 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
242 (__v4di)
243 _mm256_setzero_si256 (),
244 (__mmask8) __U);
245}
246
247extern __inline __m128i
248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
249_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
250{
251 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
252 (__v2di) __W,
253 (__mmask8) __U);
254}
255
256extern __inline __m128i
257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
259{
260 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
261 (__v2di)
a25a7887 262 _mm_setzero_si128 (),
936c0fe4
AI
263 (__mmask8) __U);
264}
265
266extern __inline __m256i
267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
269{
270 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
271 (__v4di) __W,
272 (__mmask8)
273 __U);
274}
275
276extern __inline __m256i
277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
278_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
279{
280 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
281 (__v4di)
282 _mm256_setzero_si256 (),
283 (__mmask8)
284 __U);
285}
286
287extern __inline __m128i
288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
290{
291 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
292 (__v2di) __W,
293 (__mmask8)
294 __U);
295}
296
297extern __inline __m128i
298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
299_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
300{
301 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
302 (__v2di)
a25a7887 303 _mm_setzero_si128 (),
936c0fe4
AI
304 (__mmask8)
305 __U);
306}
307
308extern __inline void
309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
311{
312 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
313 (__v4di) __A,
314 (__mmask8) __U);
315}
316
317extern __inline void
318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
320{
321 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
322 (__v2di) __A,
323 (__mmask8) __U);
324}
325
326extern __inline __m256i
327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
329{
330 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
331 (__v8si) __W,
332 (__mmask8) __U);
333}
334
335extern __inline __m256i
336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
338{
339 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
340 (__v8si)
341 _mm256_setzero_si256 (),
342 (__mmask8) __U);
343}
344
345extern __inline __m128i
346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
348{
349 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
350 (__v4si) __W,
351 (__mmask8) __U);
352}
353
354extern __inline __m128i
355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
357{
358 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
359 (__v4si)
360 _mm_setzero_si128 (),
361 (__mmask8) __U);
362}
363
364extern __inline __m256i
365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
367{
368 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
369 (__v8si) __W,
370 (__mmask8)
371 __U);
372}
373
374extern __inline __m256i
375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
377{
378 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
379 (__v8si)
380 _mm256_setzero_si256 (),
381 (__mmask8)
382 __U);
383}
384
385extern __inline __m128i
386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
388{
389 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
390 (__v4si) __W,
391 (__mmask8)
392 __U);
393}
394
395extern __inline __m128i
396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
398{
399 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
400 (__v4si)
401 _mm_setzero_si128 (),
402 (__mmask8)
403 __U);
404}
405
406extern __inline void
407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
409{
410 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
411 (__v8si) __A,
412 (__mmask8) __U);
413}
414
415extern __inline void
416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
418{
419 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
420 (__v4si) __A,
421 (__mmask8) __U);
422}
423
936c0fe4
AI
424extern __inline __m128d
425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
427{
428 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
429 (__v2df) __B,
430 (__v2df) __W,
431 (__mmask8) __U);
432}
433
434extern __inline __m128d
435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
437{
438 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
439 (__v2df) __B,
440 (__v2df)
441 _mm_setzero_pd (),
442 (__mmask8) __U);
443}
444
445extern __inline __m256d
446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
448 __m256d __B)
449{
450 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
451 (__v4df) __B,
452 (__v4df) __W,
453 (__mmask8) __U);
454}
455
456extern __inline __m256d
457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
459{
460 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
461 (__v4df) __B,
462 (__v4df)
463 _mm256_setzero_pd (),
464 (__mmask8) __U);
465}
466
467extern __inline __m128
468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 469_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
470{
471 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
472 (__v4sf) __B,
473 (__v4sf) __W,
474 (__mmask8) __U);
475}
476
477extern __inline __m128
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 479_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
480{
481 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
482 (__v4sf) __B,
483 (__v4sf)
484 _mm_setzero_ps (),
485 (__mmask8) __U);
486}
487
488extern __inline __m256
489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 490_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
491{
492 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
493 (__v8sf) __B,
494 (__v8sf) __W,
495 (__mmask8) __U);
496}
497
498extern __inline __m256
499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 500_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
501{
502 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
503 (__v8sf) __B,
504 (__v8sf)
505 _mm256_setzero_ps (),
506 (__mmask8) __U);
507}
508
509extern __inline __m128d
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
512{
513 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
514 (__v2df) __B,
515 (__v2df) __W,
516 (__mmask8) __U);
517}
518
519extern __inline __m128d
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
522{
523 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
524 (__v2df) __B,
525 (__v2df)
526 _mm_setzero_pd (),
527 (__mmask8) __U);
528}
529
530extern __inline __m256d
531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
532_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
533 __m256d __B)
534{
535 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
536 (__v4df) __B,
537 (__v4df) __W,
538 (__mmask8) __U);
539}
540
541extern __inline __m256d
542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
544{
545 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
546 (__v4df) __B,
547 (__v4df)
548 _mm256_setzero_pd (),
549 (__mmask8) __U);
550}
551
552extern __inline __m128
553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 554_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
555{
556 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
557 (__v4sf) __B,
558 (__v4sf) __W,
559 (__mmask8) __U);
560}
561
562extern __inline __m128
563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 564_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
565{
566 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
567 (__v4sf) __B,
568 (__v4sf)
569 _mm_setzero_ps (),
570 (__mmask8) __U);
571}
572
573extern __inline __m256
574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 575_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
576{
577 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
578 (__v8sf) __B,
579 (__v8sf) __W,
580 (__mmask8) __U);
581}
582
583extern __inline __m256
584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 585_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
586{
587 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
588 (__v8sf) __B,
589 (__v8sf)
590 _mm256_setzero_ps (),
591 (__mmask8) __U);
592}
593
594extern __inline void
595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596_mm256_store_epi64 (void *__P, __m256i __A)
597{
598 *(__m256i *) __P = __A;
599}
600
601extern __inline void
602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
603_mm_store_epi64 (void *__P, __m128i __A)
604{
605 *(__m128i *) __P = __A;
606}
607
608extern __inline __m256d
609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
611{
fc9cf6da 612 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
613 (__v4df) __W,
614 (__mmask8) __U);
615}
616
617extern __inline __m256d
618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
619_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
620{
fc9cf6da 621 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
622 (__v4df)
623 _mm256_setzero_pd (),
624 (__mmask8) __U);
625}
626
627extern __inline __m128d
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
630{
fc9cf6da 631 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
632 (__v2df) __W,
633 (__mmask8) __U);
634}
635
636extern __inline __m128d
637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
638_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
639{
fc9cf6da 640 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
641 (__v2df)
642 _mm_setzero_pd (),
643 (__mmask8) __U);
644}
645
646extern __inline void
647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
649{
fc9cf6da 650 __builtin_ia32_storeupd256_mask ((double *) __P,
936c0fe4
AI
651 (__v4df) __A,
652 (__mmask8) __U);
653}
654
655extern __inline void
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
658{
fc9cf6da 659 __builtin_ia32_storeupd128_mask ((double *) __P,
936c0fe4
AI
660 (__v2df) __A,
661 (__mmask8) __U);
662}
663
664extern __inline __m256
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
667{
fc9cf6da 668 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
669 (__v8sf) __W,
670 (__mmask8) __U);
671}
672
673extern __inline __m256
674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
676{
fc9cf6da 677 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
678 (__v8sf)
679 _mm256_setzero_ps (),
680 (__mmask8) __U);
681}
682
683extern __inline __m128
684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
686{
fc9cf6da 687 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
688 (__v4sf) __W,
689 (__mmask8) __U);
690}
691
692extern __inline __m128
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
695{
fc9cf6da 696 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
697 (__v4sf)
698 _mm_setzero_ps (),
699 (__mmask8) __U);
700}
701
702extern __inline void
703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
704_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
705{
fc9cf6da 706 __builtin_ia32_storeups256_mask ((float *) __P,
936c0fe4
AI
707 (__v8sf) __A,
708 (__mmask8) __U);
709}
710
711extern __inline void
712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
714{
fc9cf6da 715 __builtin_ia32_storeups128_mask ((float *) __P,
936c0fe4
AI
716 (__v4sf) __A,
717 (__mmask8) __U);
718}
719
720extern __inline __m256i
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
723{
fc9cf6da 724 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
725 (__v4di) __W,
726 (__mmask8) __U);
727}
728
729extern __inline __m256i
730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
732{
fc9cf6da 733 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
734 (__v4di)
735 _mm256_setzero_si256 (),
736 (__mmask8) __U);
737}
738
739extern __inline __m128i
740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
741_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
742{
fc9cf6da 743 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4
AI
744 (__v2di) __W,
745 (__mmask8) __U);
746}
747
748extern __inline __m128i
749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
750_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
751{
fc9cf6da 752 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4 753 (__v2di)
a25a7887 754 _mm_setzero_si128 (),
936c0fe4
AI
755 (__mmask8) __U);
756}
757
758extern __inline void
759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
761{
fc9cf6da 762 __builtin_ia32_storedqudi256_mask ((long long *) __P,
936c0fe4
AI
763 (__v4di) __A,
764 (__mmask8) __U);
765}
766
767extern __inline void
768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
769_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
770{
fc9cf6da 771 __builtin_ia32_storedqudi128_mask ((long long *) __P,
936c0fe4
AI
772 (__v2di) __A,
773 (__mmask8) __U);
774}
775
776extern __inline __m256i
777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
778_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
779{
fc9cf6da 780 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
781 (__v8si) __W,
782 (__mmask8) __U);
783}
784
785extern __inline __m256i
786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
787_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
788{
fc9cf6da 789 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
790 (__v8si)
791 _mm256_setzero_si256 (),
792 (__mmask8) __U);
793}
794
795extern __inline __m128i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
798{
fc9cf6da 799 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
800 (__v4si) __W,
801 (__mmask8) __U);
802}
803
804extern __inline __m128i
805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
806_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
807{
fc9cf6da 808 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
809 (__v4si)
810 _mm_setzero_si128 (),
811 (__mmask8) __U);
812}
813
814extern __inline void
815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
816_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
817{
fc9cf6da 818 __builtin_ia32_storedqusi256_mask ((int *) __P,
936c0fe4
AI
819 (__v8si) __A,
820 (__mmask8) __U);
821}
822
823extern __inline void
824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
826{
fc9cf6da 827 __builtin_ia32_storedqusi128_mask ((int *) __P,
936c0fe4
AI
828 (__v4si) __A,
829 (__mmask8) __U);
830}
831
832extern __inline __m256i
833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
835{
836 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
837 (__v8si) __W,
838 (__mmask8) __U);
839}
840
841extern __inline __m256i
842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
844{
845 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
846 (__v8si)
847 _mm256_setzero_si256 (),
848 (__mmask8) __U);
849}
850
851extern __inline __m128i
852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
854{
855 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
856 (__v4si) __W,
857 (__mmask8) __U);
858}
859
860extern __inline __m128i
861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
862_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
863{
864 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
865 (__v4si)
866 _mm_setzero_si128 (),
867 (__mmask8) __U);
868}
869
870extern __inline __m256i
871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
872_mm256_abs_epi64 (__m256i __A)
873{
874 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
875 (__v4di)
876 _mm256_setzero_si256 (),
877 (__mmask8) -1);
878}
879
880extern __inline __m256i
881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
883{
884 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
885 (__v4di) __W,
886 (__mmask8) __U);
887}
888
889extern __inline __m256i
890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
891_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
892{
893 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
894 (__v4di)
895 _mm256_setzero_si256 (),
896 (__mmask8) __U);
897}
898
899extern __inline __m128i
900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
901_mm_abs_epi64 (__m128i __A)
902{
903 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
904 (__v2di)
a25a7887 905 _mm_setzero_si128 (),
936c0fe4
AI
906 (__mmask8) -1);
907}
908
909extern __inline __m128i
910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
912{
913 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
914 (__v2di) __W,
915 (__mmask8) __U);
916}
917
918extern __inline __m128i
919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
920_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
921{
922 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
923 (__v2di)
a25a7887 924 _mm_setzero_si128 (),
936c0fe4
AI
925 (__mmask8) __U);
926}
927
928extern __inline __m128i
929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
930_mm256_cvtpd_epu32 (__m256d __A)
931{
932 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
933 (__v4si)
934 _mm_setzero_si128 (),
935 (__mmask8) -1);
936}
937
938extern __inline __m128i
939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
941{
942 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
943 (__v4si) __W,
944 (__mmask8) __U);
945}
946
947extern __inline __m128i
948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
949_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
950{
951 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
952 (__v4si)
953 _mm_setzero_si128 (),
954 (__mmask8) __U);
955}
956
957extern __inline __m128i
958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
959_mm_cvtpd_epu32 (__m128d __A)
960{
961 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
962 (__v4si)
963 _mm_setzero_si128 (),
964 (__mmask8) -1);
965}
966
967extern __inline __m128i
968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
970{
971 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
972 (__v4si) __W,
973 (__mmask8) __U);
974}
975
976extern __inline __m128i
977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
979{
980 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
981 (__v4si)
982 _mm_setzero_si128 (),
983 (__mmask8) __U);
984}
985
986extern __inline __m256i
987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
988_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
989{
990 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
991 (__v8si) __W,
992 (__mmask8) __U);
993}
994
995extern __inline __m256i
996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
998{
999 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1000 (__v8si)
1001 _mm256_setzero_si256 (),
1002 (__mmask8) __U);
1003}
1004
1005extern __inline __m128i
1006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1008{
1009 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1010 (__v4si) __W,
1011 (__mmask8) __U);
1012}
1013
1014extern __inline __m128i
1015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1017{
1018 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1019 (__v4si)
1020 _mm_setzero_si128 (),
1021 (__mmask8) __U);
1022}
1023
1024extern __inline __m256i
1025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026_mm256_cvttps_epu32 (__m256 __A)
1027{
1028 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1029 (__v8si)
1030 _mm256_setzero_si256 (),
1031 (__mmask8) -1);
1032}
1033
1034extern __inline __m256i
1035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1037{
1038 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1039 (__v8si) __W,
1040 (__mmask8) __U);
1041}
1042
1043extern __inline __m256i
1044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1045_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1046{
1047 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1048 (__v8si)
1049 _mm256_setzero_si256 (),
1050 (__mmask8) __U);
1051}
1052
1053extern __inline __m128i
1054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1055_mm_cvttps_epu32 (__m128 __A)
1056{
1057 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1058 (__v4si)
1059 _mm_setzero_si128 (),
1060 (__mmask8) -1);
1061}
1062
1063extern __inline __m128i
1064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1065_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1066{
1067 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1068 (__v4si) __W,
1069 (__mmask8) __U);
1070}
1071
1072extern __inline __m128i
1073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1074_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1075{
1076 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1077 (__v4si)
1078 _mm_setzero_si128 (),
1079 (__mmask8) __U);
1080}
1081
1082extern __inline __m128i
1083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1085{
1086 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1087 (__v4si) __W,
1088 (__mmask8) __U);
1089}
1090
1091extern __inline __m128i
1092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1094{
1095 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1096 (__v4si)
1097 _mm_setzero_si128 (),
1098 (__mmask8) __U);
1099}
1100
1101extern __inline __m128i
1102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1104{
1105 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1106 (__v4si) __W,
1107 (__mmask8) __U);
1108}
1109
1110extern __inline __m128i
1111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1113{
1114 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1115 (__v4si)
1116 _mm_setzero_si128 (),
1117 (__mmask8) __U);
1118}
1119
1120extern __inline __m128i
1121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122_mm256_cvttpd_epu32 (__m256d __A)
1123{
1124 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1125 (__v4si)
1126 _mm_setzero_si128 (),
1127 (__mmask8) -1);
1128}
1129
1130extern __inline __m128i
1131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1132_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1133{
1134 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1135 (__v4si) __W,
1136 (__mmask8) __U);
1137}
1138
1139extern __inline __m128i
1140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1142{
1143 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1144 (__v4si)
1145 _mm_setzero_si128 (),
1146 (__mmask8) __U);
1147}
1148
1149extern __inline __m128i
1150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151_mm_cvttpd_epu32 (__m128d __A)
1152{
1153 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1154 (__v4si)
1155 _mm_setzero_si128 (),
1156 (__mmask8) -1);
1157}
1158
1159extern __inline __m128i
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1162{
1163 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1164 (__v4si) __W,
1165 (__mmask8) __U);
1166}
1167
1168extern __inline __m128i
1169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1170_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1171{
1172 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1173 (__v4si)
1174 _mm_setzero_si128 (),
1175 (__mmask8) __U);
1176}
1177
1178extern __inline __m128i
1179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1180_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1181{
1182 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1183 (__v4si) __W,
1184 (__mmask8) __U);
1185}
1186
1187extern __inline __m128i
1188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1189_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1190{
1191 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1192 (__v4si)
1193 _mm_setzero_si128 (),
1194 (__mmask8) __U);
1195}
1196
1197extern __inline __m128i
1198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1200{
1201 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1202 (__v4si) __W,
1203 (__mmask8) __U);
1204}
1205
1206extern __inline __m128i
1207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1209{
1210 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1211 (__v4si)
1212 _mm_setzero_si128 (),
1213 (__mmask8) __U);
1214}
1215
1216extern __inline __m256d
1217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1219{
1220 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1221 (__v4df) __W,
1222 (__mmask8) __U);
1223}
1224
1225extern __inline __m256d
1226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1228{
1229 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1230 (__v4df)
1231 _mm256_setzero_pd (),
1232 (__mmask8) __U);
1233}
1234
1235extern __inline __m128d
1236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1237_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1238{
1239 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1240 (__v2df) __W,
1241 (__mmask8) __U);
1242}
1243
1244extern __inline __m128d
1245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1246_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1247{
1248 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1249 (__v2df)
1250 _mm_setzero_pd (),
1251 (__mmask8) __U);
1252}
1253
1254extern __inline __m256d
1255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1256_mm256_cvtepu32_pd (__m128i __A)
1257{
1258 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1259 (__v4df)
1260 _mm256_setzero_pd (),
1261 (__mmask8) -1);
1262}
1263
1264extern __inline __m256d
1265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1266_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1267{
1268 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1269 (__v4df) __W,
1270 (__mmask8) __U);
1271}
1272
1273extern __inline __m256d
1274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1275_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1276{
1277 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1278 (__v4df)
1279 _mm256_setzero_pd (),
1280 (__mmask8) __U);
1281}
1282
1283extern __inline __m128d
1284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1285_mm_cvtepu32_pd (__m128i __A)
1286{
1287 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1288 (__v2df)
1289 _mm_setzero_pd (),
1290 (__mmask8) -1);
1291}
1292
1293extern __inline __m128d
1294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1295_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1296{
1297 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1298 (__v2df) __W,
1299 (__mmask8) __U);
1300}
1301
1302extern __inline __m128d
1303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1305{
1306 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1307 (__v2df)
1308 _mm_setzero_pd (),
1309 (__mmask8) __U);
1310}
1311
1312extern __inline __m256
1313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1315{
1316 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1317 (__v8sf) __W,
1318 (__mmask8) __U);
1319}
1320
1321extern __inline __m256
1322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1323_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
936c0fe4
AI
1324{
1325 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1326 (__v8sf)
1327 _mm256_setzero_ps (),
1328 (__mmask8) __U);
1329}
1330
1331extern __inline __m128
1332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1334{
1335 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1336 (__v4sf) __W,
1337 (__mmask8) __U);
1338}
1339
1340extern __inline __m128
1341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1342_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
936c0fe4
AI
1343{
1344 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1345 (__v4sf)
1346 _mm_setzero_ps (),
1347 (__mmask8) __U);
1348}
1349
1350extern __inline __m256
1351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352_mm256_cvtepu32_ps (__m256i __A)
1353{
1354 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1355 (__v8sf)
1356 _mm256_setzero_ps (),
1357 (__mmask8) -1);
1358}
1359
1360extern __inline __m256
1361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1363{
1364 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1365 (__v8sf) __W,
1366 (__mmask8) __U);
1367}
1368
1369extern __inline __m256
1370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1371_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1372{
1373 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1374 (__v8sf)
1375 _mm256_setzero_ps (),
1376 (__mmask8) __U);
1377}
1378
1379extern __inline __m128
1380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381_mm_cvtepu32_ps (__m128i __A)
1382{
1383 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1384 (__v4sf)
1385 _mm_setzero_ps (),
1386 (__mmask8) -1);
1387}
1388
1389extern __inline __m128
1390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1392{
1393 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1394 (__v4sf) __W,
1395 (__mmask8) __U);
1396}
1397
1398extern __inline __m128
1399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1400_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1401{
1402 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1403 (__v4sf)
1404 _mm_setzero_ps (),
1405 (__mmask8) __U);
1406}
1407
1408extern __inline __m256d
1409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1411{
1412 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1413 (__v4df) __W,
1414 (__mmask8) __U);
1415}
1416
1417extern __inline __m256d
1418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1420{
1421 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1422 (__v4df)
1423 _mm256_setzero_pd (),
1424 (__mmask8) __U);
1425}
1426
1427extern __inline __m128d
1428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1429_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1430{
1431 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1432 (__v2df) __W,
1433 (__mmask8) __U);
1434}
1435
1436extern __inline __m128d
1437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1439{
1440 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1441 (__v2df)
1442 _mm_setzero_pd (),
1443 (__mmask8) __U);
1444}
1445
1446extern __inline __m128i
1447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1448_mm_cvtepi32_epi8 (__m128i __A)
1449{
1450 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
a25a7887
JJ
1451 (__v16qi)
1452 _mm_undefined_si128 (),
936c0fe4
AI
1453 (__mmask8) -1);
1454}
1455
1456extern __inline void
1457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1458_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1459{
1460 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1461}
1462
1463extern __inline __m128i
1464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1466{
1467 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468 (__v16qi) __O, __M);
1469}
1470
1471extern __inline __m128i
1472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1474{
1475 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1476 (__v16qi)
1477 _mm_setzero_si128 (),
1478 __M);
1479}
1480
1481extern __inline __m128i
1482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1483_mm256_cvtepi32_epi8 (__m256i __A)
1484{
1485 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
a25a7887
JJ
1486 (__v16qi)
1487 _mm_undefined_si128 (),
936c0fe4
AI
1488 (__mmask8) -1);
1489}
1490
1491extern __inline __m128i
1492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1493_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1494{
1495 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1496 (__v16qi) __O, __M);
1497}
1498
1499extern __inline void
1500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1502{
1503 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1504}
1505
1506extern __inline __m128i
1507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1509{
1510 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511 (__v16qi)
1512 _mm_setzero_si128 (),
1513 __M);
1514}
1515
1516extern __inline __m128i
1517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1518_mm_cvtsepi32_epi8 (__m128i __A)
1519{
1520 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
a25a7887
JJ
1521 (__v16qi)
1522 _mm_undefined_si128 (),
936c0fe4
AI
1523 (__mmask8) -1);
1524}
1525
1526extern __inline void
1527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1529{
1530 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1531}
1532
1533extern __inline __m128i
1534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1536{
1537 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1538 (__v16qi) __O, __M);
1539}
1540
1541extern __inline __m128i
1542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1543_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1544{
1545 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1546 (__v16qi)
1547 _mm_setzero_si128 (),
1548 __M);
1549}
1550
1551extern __inline __m128i
1552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553_mm256_cvtsepi32_epi8 (__m256i __A)
1554{
1555 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
a25a7887
JJ
1556 (__v16qi)
1557 _mm_undefined_si128 (),
936c0fe4
AI
1558 (__mmask8) -1);
1559}
1560
1561extern __inline void
1562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1563_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1564{
1565 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1566}
1567
1568extern __inline __m128i
1569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1571{
1572 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1573 (__v16qi) __O, __M);
1574}
1575
1576extern __inline __m128i
1577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1578_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1579{
1580 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1581 (__v16qi)
1582 _mm_setzero_si128 (),
1583 __M);
1584}
1585
1586extern __inline __m128i
1587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1588_mm_cvtusepi32_epi8 (__m128i __A)
1589{
1590 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
a25a7887
JJ
1591 (__v16qi)
1592 _mm_undefined_si128 (),
936c0fe4
AI
1593 (__mmask8) -1);
1594}
1595
1596extern __inline void
1597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1599{
1600 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1601}
1602
1603extern __inline __m128i
1604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1605_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1606{
1607 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1608 (__v16qi) __O,
1609 __M);
1610}
1611
1612extern __inline __m128i
1613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1615{
1616 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1617 (__v16qi)
1618 _mm_setzero_si128 (),
1619 __M);
1620}
1621
1622extern __inline __m128i
1623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1624_mm256_cvtusepi32_epi8 (__m256i __A)
1625{
1626 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
a25a7887
JJ
1627 (__v16qi)
1628 _mm_undefined_si128 (),
936c0fe4
AI
1629 (__mmask8) -1);
1630}
1631
1632extern __inline void
1633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1635{
1636 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1637}
1638
1639extern __inline __m128i
1640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1642{
1643 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1644 (__v16qi) __O,
1645 __M);
1646}
1647
1648extern __inline __m128i
1649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1651{
1652 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1653 (__v16qi)
1654 _mm_setzero_si128 (),
1655 __M);
1656}
1657
1658extern __inline __m128i
1659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660_mm_cvtepi32_epi16 (__m128i __A)
1661{
1662 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
a25a7887
JJ
1663 (__v8hi)
1664 _mm_setzero_si128 (),
936c0fe4
AI
1665 (__mmask8) -1);
1666}
1667
1668extern __inline void
1669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1670_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1671{
1672 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1673}
1674
1675extern __inline __m128i
1676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1677_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1678{
1679 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1680 (__v8hi) __O, __M);
1681}
1682
1683extern __inline __m128i
1684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1686{
1687 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1688 (__v8hi)
1689 _mm_setzero_si128 (),
1690 __M);
1691}
1692
1693extern __inline __m128i
1694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695_mm256_cvtepi32_epi16 (__m256i __A)
1696{
1697 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
a25a7887
JJ
1698 (__v8hi)
1699 _mm_setzero_si128 (),
936c0fe4
AI
1700 (__mmask8) -1);
1701}
1702
9ab4c07a 1703extern __inline void
936c0fe4
AI
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1706{
1707 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1708}
1709
1710extern __inline __m128i
1711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1712_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1713{
1714 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1715 (__v8hi) __O, __M);
1716}
1717
1718extern __inline __m128i
1719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1721{
1722 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1723 (__v8hi)
1724 _mm_setzero_si128 (),
1725 __M);
1726}
1727
1728extern __inline __m128i
1729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730_mm_cvtsepi32_epi16 (__m128i __A)
1731{
1732 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
a25a7887
JJ
1733 (__v8hi)
1734 _mm_setzero_si128 (),
936c0fe4
AI
1735 (__mmask8) -1);
1736}
1737
1738extern __inline void
1739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1741{
1742 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1743}
1744
1745extern __inline __m128i
1746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1747_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1748{
1749 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1750 (__v8hi)__O,
1751 __M);
1752}
1753
1754extern __inline __m128i
1755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1756_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1757{
1758 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1759 (__v8hi)
1760 _mm_setzero_si128 (),
1761 __M);
1762}
1763
1764extern __inline __m128i
1765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766_mm256_cvtsepi32_epi16 (__m256i __A)
1767{
1768 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
a25a7887
JJ
1769 (__v8hi)
1770 _mm_undefined_si128 (),
936c0fe4
AI
1771 (__mmask8) -1);
1772}
1773
1774extern __inline void
1775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1776_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1777{
1778 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1779}
1780
1781extern __inline __m128i
1782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1784{
1785 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1786 (__v8hi) __O, __M);
1787}
1788
1789extern __inline __m128i
1790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1791_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1792{
1793 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1794 (__v8hi)
1795 _mm_setzero_si128 (),
1796 __M);
1797}
1798
1799extern __inline __m128i
1800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801_mm_cvtusepi32_epi16 (__m128i __A)
1802{
1803 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
a25a7887
JJ
1804 (__v8hi)
1805 _mm_undefined_si128 (),
936c0fe4
AI
1806 (__mmask8) -1);
1807}
1808
9ab4c07a 1809extern __inline void
936c0fe4
AI
1810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1812{
1813 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1814}
1815
1816extern __inline __m128i
1817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1819{
1820 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1821 (__v8hi) __O, __M);
1822}
1823
1824extern __inline __m128i
1825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1826_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1827{
1828 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1829 (__v8hi)
1830 _mm_setzero_si128 (),
1831 __M);
1832}
1833
1834extern __inline __m128i
1835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1836_mm256_cvtusepi32_epi16 (__m256i __A)
1837{
1838 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
a25a7887
JJ
1839 (__v8hi)
1840 _mm_undefined_si128 (),
936c0fe4
AI
1841 (__mmask8) -1);
1842}
1843
1844extern __inline void
1845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1847{
1848 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1849}
1850
1851extern __inline __m128i
1852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1854{
1855 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1856 (__v8hi) __O, __M);
1857}
1858
1859extern __inline __m128i
1860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1861_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1862{
1863 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1864 (__v8hi)
1865 _mm_setzero_si128 (),
1866 __M);
1867}
1868
1869extern __inline __m128i
1870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1871_mm_cvtepi64_epi8 (__m128i __A)
1872{
1873 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
a25a7887
JJ
1874 (__v16qi)
1875 _mm_undefined_si128 (),
936c0fe4
AI
1876 (__mmask8) -1);
1877}
1878
1879extern __inline void
1880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1881_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1882{
1883 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1884}
1885
1886extern __inline __m128i
1887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1888_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1889{
1890 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1891 (__v16qi) __O, __M);
1892}
1893
1894extern __inline __m128i
1895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1896_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1897{
1898 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1899 (__v16qi)
1900 _mm_setzero_si128 (),
1901 __M);
1902}
1903
1904extern __inline __m128i
1905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906_mm256_cvtepi64_epi8 (__m256i __A)
1907{
1908 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
a25a7887
JJ
1909 (__v16qi)
1910 _mm_undefined_si128 (),
936c0fe4
AI
1911 (__mmask8) -1);
1912}
1913
1914extern __inline void
1915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1916_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1917{
1918 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1919}
1920
1921extern __inline __m128i
1922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1924{
1925 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1926 (__v16qi) __O, __M);
1927}
1928
1929extern __inline __m128i
1930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1932{
1933 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1934 (__v16qi)
1935 _mm_setzero_si128 (),
1936 __M);
1937}
1938
1939extern __inline __m128i
1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941_mm_cvtsepi64_epi8 (__m128i __A)
1942{
1943 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
a25a7887
JJ
1944 (__v16qi)
1945 _mm_undefined_si128 (),
936c0fe4
AI
1946 (__mmask8) -1);
1947}
1948
1949extern __inline void
1950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1952{
1953 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1954}
1955
1956extern __inline __m128i
1957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1958_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1959{
1960 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1961 (__v16qi) __O, __M);
1962}
1963
1964extern __inline __m128i
1965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1967{
1968 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1969 (__v16qi)
1970 _mm_setzero_si128 (),
1971 __M);
1972}
1973
1974extern __inline __m128i
1975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1976_mm256_cvtsepi64_epi8 (__m256i __A)
1977{
1978 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
a25a7887
JJ
1979 (__v16qi)
1980 _mm_undefined_si128 (),
936c0fe4
AI
1981 (__mmask8) -1);
1982}
1983
1984extern __inline void
1985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1987{
1988 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1989}
1990
1991extern __inline __m128i
1992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1993_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1994{
1995 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1996 (__v16qi) __O, __M);
1997}
1998
1999extern __inline __m128i
2000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2002{
2003 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2004 (__v16qi)
2005 _mm_setzero_si128 (),
2006 __M);
2007}
2008
2009extern __inline __m128i
2010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011_mm_cvtusepi64_epi8 (__m128i __A)
2012{
2013 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
a25a7887
JJ
2014 (__v16qi)
2015 _mm_undefined_si128 (),
936c0fe4
AI
2016 (__mmask8) -1);
2017}
2018
2019extern __inline void
2020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2022{
2023 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2024}
2025
2026extern __inline __m128i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2029{
2030 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031 (__v16qi) __O,
2032 __M);
2033}
2034
2035extern __inline __m128i
2036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2038{
2039 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040 (__v16qi)
2041 _mm_setzero_si128 (),
2042 __M);
2043}
2044
2045extern __inline __m128i
2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047_mm256_cvtusepi64_epi8 (__m256i __A)
2048{
2049 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
a25a7887
JJ
2050 (__v16qi)
2051 _mm_undefined_si128 (),
936c0fe4
AI
2052 (__mmask8) -1);
2053}
2054
2055extern __inline void
2056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2058{
2059 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2060}
2061
2062extern __inline __m128i
2063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2065{
2066 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2067 (__v16qi) __O,
2068 __M);
2069}
2070
2071extern __inline __m128i
2072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2074{
2075 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2076 (__v16qi)
2077 _mm_setzero_si128 (),
2078 __M);
2079}
2080
2081extern __inline __m128i
2082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2083_mm_cvtepi64_epi16 (__m128i __A)
2084{
2085 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
a25a7887
JJ
2086 (__v8hi)
2087 _mm_undefined_si128 (),
936c0fe4
AI
2088 (__mmask8) -1);
2089}
2090
2091extern __inline void
2092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2093_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2094{
2095 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2096}
2097
2098extern __inline __m128i
2099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2100_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2101{
2102 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2103 (__v8hi)__O,
2104 __M);
2105}
2106
2107extern __inline __m128i
2108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2110{
2111 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2112 (__v8hi)
2113 _mm_setzero_si128 (),
2114 __M);
2115}
2116
2117extern __inline __m128i
2118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2119_mm256_cvtepi64_epi16 (__m256i __A)
2120{
2121 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
a25a7887
JJ
2122 (__v8hi)
2123 _mm_undefined_si128 (),
936c0fe4
AI
2124 (__mmask8) -1);
2125}
2126
2127extern __inline void
2128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2129_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2130{
2131 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2132}
2133
2134extern __inline __m128i
2135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2136_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2137{
2138 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2139 (__v8hi) __O, __M);
2140}
2141
2142extern __inline __m128i
2143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2145{
2146 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2147 (__v8hi)
2148 _mm_setzero_si128 (),
2149 __M);
2150}
2151
2152extern __inline __m128i
2153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154_mm_cvtsepi64_epi16 (__m128i __A)
2155{
2156 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
a25a7887
JJ
2157 (__v8hi)
2158 _mm_undefined_si128 (),
936c0fe4
AI
2159 (__mmask8) -1);
2160}
2161
2162extern __inline void
2163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2164_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2165{
2166 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2167}
2168
2169extern __inline __m128i
2170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2171_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2172{
2173 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2174 (__v8hi) __O, __M);
2175}
2176
2177extern __inline __m128i
2178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2180{
2181 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2182 (__v8hi)
2183 _mm_setzero_si128 (),
2184 __M);
2185}
2186
2187extern __inline __m128i
2188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189_mm256_cvtsepi64_epi16 (__m256i __A)
2190{
2191 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
a25a7887
JJ
2192 (__v8hi)
2193 _mm_undefined_si128 (),
936c0fe4
AI
2194 (__mmask8) -1);
2195}
2196
2197extern __inline void
2198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2199_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2200{
2201 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2202}
2203
2204extern __inline __m128i
2205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2206_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2207{
2208 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2209 (__v8hi) __O, __M);
2210}
2211
2212extern __inline __m128i
2213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2214_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2215{
2216 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2217 (__v8hi)
2218 _mm_setzero_si128 (),
2219 __M);
2220}
2221
2222extern __inline __m128i
2223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2224_mm_cvtusepi64_epi16 (__m128i __A)
2225{
2226 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
a25a7887
JJ
2227 (__v8hi)
2228 _mm_undefined_si128 (),
936c0fe4
AI
2229 (__mmask8) -1);
2230}
2231
2232extern __inline void
2233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2234_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2235{
2236 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2237}
2238
2239extern __inline __m128i
2240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2242{
2243 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2244 (__v8hi) __O, __M);
2245}
2246
2247extern __inline __m128i
2248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2249_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2250{
2251 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2252 (__v8hi)
2253 _mm_setzero_si128 (),
2254 __M);
2255}
2256
2257extern __inline __m128i
2258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2259_mm256_cvtusepi64_epi16 (__m256i __A)
2260{
2261 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
a25a7887
JJ
2262 (__v8hi)
2263 _mm_undefined_si128 (),
936c0fe4
AI
2264 (__mmask8) -1);
2265}
2266
2267extern __inline void
2268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2270{
2271 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2272}
2273
2274extern __inline __m128i
2275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2277{
2278 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2279 (__v8hi) __O, __M);
2280}
2281
2282extern __inline __m128i
2283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2284_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2285{
2286 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2287 (__v8hi)
2288 _mm_setzero_si128 (),
2289 __M);
2290}
2291
2292extern __inline __m128i
2293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2294_mm_cvtepi64_epi32 (__m128i __A)
2295{
2296 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
a25a7887
JJ
2297 (__v4si)
2298 _mm_undefined_si128 (),
936c0fe4
AI
2299 (__mmask8) -1);
2300}
2301
2302extern __inline void
2303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2304_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2305{
2306 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2307}
2308
2309extern __inline __m128i
2310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2312{
2313 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314 (__v4si) __O, __M);
2315}
2316
2317extern __inline __m128i
2318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2319_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2320{
2321 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2322 (__v4si)
2323 _mm_setzero_si128 (),
2324 __M);
2325}
2326
2327extern __inline __m128i
2328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329_mm256_cvtepi64_epi32 (__m256i __A)
2330{
2331 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
a25a7887
JJ
2332 (__v4si)
2333 _mm_undefined_si128 (),
936c0fe4
AI
2334 (__mmask8) -1);
2335}
2336
2337extern __inline void
2338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2339_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2340{
2341 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2342}
2343
2344extern __inline __m128i
2345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2347{
2348 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2349 (__v4si) __O, __M);
2350}
2351
2352extern __inline __m128i
2353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2355{
2356 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2357 (__v4si)
2358 _mm_setzero_si128 (),
2359 __M);
2360}
2361
2362extern __inline __m128i
2363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364_mm_cvtsepi64_epi32 (__m128i __A)
2365{
2366 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
a25a7887
JJ
2367 (__v4si)
2368 _mm_undefined_si128 (),
936c0fe4
AI
2369 (__mmask8) -1);
2370}
2371
9ab4c07a 2372extern __inline void
936c0fe4
AI
2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2375{
2376 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2377}
2378
2379extern __inline __m128i
2380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2381_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2382{
2383 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2384 (__v4si) __O, __M);
2385}
2386
2387extern __inline __m128i
2388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2390{
2391 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2392 (__v4si)
2393 _mm_setzero_si128 (),
2394 __M);
2395}
2396
2397extern __inline __m128i
2398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399_mm256_cvtsepi64_epi32 (__m256i __A)
2400{
2401 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
a25a7887
JJ
2402 (__v4si)
2403 _mm_undefined_si128 (),
936c0fe4
AI
2404 (__mmask8) -1);
2405}
2406
2407extern __inline void
2408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2409_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2410{
2411 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2412}
2413
2414extern __inline __m128i
2415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2416_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2417{
2418 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2419 (__v4si)__O,
2420 __M);
2421}
2422
2423extern __inline __m128i
2424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2426{
2427 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2428 (__v4si)
2429 _mm_setzero_si128 (),
2430 __M);
2431}
2432
2433extern __inline __m128i
2434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2435_mm_cvtusepi64_epi32 (__m128i __A)
2436{
2437 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
a25a7887
JJ
2438 (__v4si)
2439 _mm_undefined_si128 (),
936c0fe4
AI
2440 (__mmask8) -1);
2441}
2442
2443extern __inline void
2444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2445_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2446{
2447 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2448}
2449
2450extern __inline __m128i
2451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2453{
2454 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2455 (__v4si) __O, __M);
2456}
2457
2458extern __inline __m128i
2459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2460_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2461{
2462 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2463 (__v4si)
2464 _mm_setzero_si128 (),
2465 __M);
2466}
2467
2468extern __inline __m128i
2469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470_mm256_cvtusepi64_epi32 (__m256i __A)
2471{
2472 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
a25a7887
JJ
2473 (__v4si)
2474 _mm_undefined_si128 (),
936c0fe4
AI
2475 (__mmask8) -1);
2476}
2477
2478extern __inline void
2479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2480_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2481{
2482 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2483}
2484
2485extern __inline __m128i
2486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2487_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2488{
2489 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2490 (__v4si) __O, __M);
2491}
2492
2493extern __inline __m128i
2494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2496{
2497 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2498 (__v4si)
2499 _mm_setzero_si128 (),
2500 __M);
2501}
2502
2503extern __inline __m256
2504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2505_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2506{
2507 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2508 (__v8sf) __O,
2509 __M);
2510}
2511
2512extern __inline __m256
2513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2514_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2515{
2516 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2517 (__v8sf)
2518 _mm256_setzero_ps (),
2519 __M);
2520}
2521
2522extern __inline __m128
2523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2524_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2525{
2526 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2527 (__v4sf) __O,
2528 __M);
2529}
2530
2531extern __inline __m128
2532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2534{
2535 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2536 (__v4sf)
2537 _mm_setzero_ps (),
2538 __M);
2539}
2540
2541extern __inline __m256d
2542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2543_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2544{
2545 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2546 (__v4df) __O,
2547 __M);
2548}
2549
2550extern __inline __m256d
2551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2553{
2554 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2555 (__v4df)
2556 _mm256_setzero_pd (),
2557 __M);
2558}
2559
2560extern __inline __m256i
2561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2563{
2564 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2565 (__v8si) __O,
2566 __M);
2567}
2568
2569extern __inline __m256i
2570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2572{
2573 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2574 (__v8si)
2575 _mm256_setzero_si256 (),
2576 __M);
2577}
2578
2579extern __inline __m256i
2580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2581_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2582{
2583 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2584 __M);
2585}
2586
2587extern __inline __m256i
2588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2589_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2590{
2591 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2592 (__v8si)
2593 _mm256_setzero_si256 (),
2594 __M);
2595}
2596
2597extern __inline __m128i
2598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2599_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2600{
2601 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2602 (__v4si) __O,
2603 __M);
2604}
2605
2606extern __inline __m128i
2607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2609{
2610 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2611 (__v4si)
2612 _mm_setzero_si128 (),
2613 __M);
2614}
2615
2616extern __inline __m128i
2617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2618_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2619{
2620 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2621 __M);
2622}
2623
2624extern __inline __m128i
2625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2626_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2627{
a25a7887
JJ
2628 return (__m128i)
2629 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2630 (__v4si) _mm_setzero_si128 (),
2631 __M);
936c0fe4
AI
2632}
2633
2634extern __inline __m256i
2635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2636_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2637{
2638 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2639 (__v4di) __O,
2640 __M);
2641}
2642
2643extern __inline __m256i
2644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2645_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2646{
2647 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2648 (__v4di)
2649 _mm256_setzero_si256 (),
2650 __M);
2651}
2652
2653extern __inline __m256i
2654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2656{
936c0fe4
AI
2657 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2658 __M);
936c0fe4
AI
2659}
2660
2661extern __inline __m256i
2662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2663_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2664{
936c0fe4
AI
2665 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2666 (__v4di)
2667 _mm256_setzero_si256 (),
2668 __M);
936c0fe4
AI
2669}
2670
2671extern __inline __m128i
2672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2674{
2675 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2676 (__v2di) __O,
2677 __M);
2678}
2679
2680extern __inline __m128i
2681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2682_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2683{
2684 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2685 (__v2di)
2686 _mm_setzero_si128 (),
2687 __M);
2688}
2689
2690extern __inline __m128i
2691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2693{
936c0fe4
AI
2694 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2695 __M);
936c0fe4
AI
2696}
2697
2698extern __inline __m128i
2699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2700_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2701{
a25a7887
JJ
2702 return (__m128i)
2703 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2704 (__v2di) _mm_setzero_si128 (),
2705 __M);
936c0fe4
AI
2706}
2707
2708extern __inline __m256
2709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710_mm256_broadcast_f32x4 (__m128 __A)
2711{
2712 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2713 (__v8sf)_mm256_undefined_pd (),
c42b0bdf 2714 (__mmask8) -1);
936c0fe4
AI
2715}
2716
2717extern __inline __m256
2718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2719_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2720{
2721 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2722 (__v8sf) __O,
2723 __M);
2724}
2725
2726extern __inline __m256
2727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2728_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2729{
2730 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2731 (__v8sf)
2732 _mm256_setzero_ps (),
2733 __M);
2734}
2735
2736extern __inline __m256i
2737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2738_mm256_broadcast_i32x4 (__m128i __A)
2739{
2740 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2741 __A,
2742 (__v8si)_mm256_undefined_si256 (),
c42b0bdf 2743 (__mmask8) -1);
936c0fe4
AI
2744}
2745
2746extern __inline __m256i
2747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2748_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2749{
2750 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2751 __A,
2752 (__v8si)
2753 __O, __M);
2754}
2755
2756extern __inline __m256i
2757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2759{
2760 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2761 __A,
2762 (__v8si)
2763 _mm256_setzero_si256 (),
2764 __M);
2765}
2766
2767extern __inline __m256i
2768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2769_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2770{
2771 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2772 (__v8si) __W,
2773 (__mmask8) __U);
2774}
2775
2776extern __inline __m256i
2777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2778_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2779{
2780 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2781 (__v8si)
2782 _mm256_setzero_si256 (),
2783 (__mmask8) __U);
2784}
2785
2786extern __inline __m128i
2787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2789{
2790 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2791 (__v4si) __W,
2792 (__mmask8) __U);
2793}
2794
2795extern __inline __m128i
2796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2798{
2799 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2800 (__v4si)
2801 _mm_setzero_si128 (),
2802 (__mmask8) __U);
2803}
2804
2805extern __inline __m256i
2806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2808{
2809 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2810 (__v4di) __W,
2811 (__mmask8) __U);
2812}
2813
2814extern __inline __m256i
2815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2817{
2818 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2819 (__v4di)
2820 _mm256_setzero_si256 (),
2821 (__mmask8) __U);
2822}
2823
2824extern __inline __m128i
2825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2826_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2827{
2828 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2829 (__v2di) __W,
2830 (__mmask8) __U);
2831}
2832
2833extern __inline __m128i
2834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2836{
2837 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2838 (__v2di)
2839 _mm_setzero_si128 (),
2840 (__mmask8) __U);
2841}
2842
2843extern __inline __m256i
2844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2846{
2847 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2848 (__v8si) __W,
2849 (__mmask8) __U);
2850}
2851
2852extern __inline __m256i
2853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2855{
2856 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2857 (__v8si)
2858 _mm256_setzero_si256 (),
2859 (__mmask8) __U);
2860}
2861
2862extern __inline __m128i
2863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2864_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2865{
2866 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2867 (__v4si) __W,
2868 (__mmask8) __U);
2869}
2870
2871extern __inline __m128i
2872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2873_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2874{
2875 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2876 (__v4si)
2877 _mm_setzero_si128 (),
2878 (__mmask8) __U);
2879}
2880
2881extern __inline __m256i
2882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2884{
2885 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2886 (__v4di) __W,
2887 (__mmask8) __U);
2888}
2889
2890extern __inline __m256i
2891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2892_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2893{
2894 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2895 (__v4di)
2896 _mm256_setzero_si256 (),
2897 (__mmask8) __U);
2898}
2899
2900extern __inline __m128i
2901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2902_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2903{
2904 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2905 (__v2di) __W,
2906 (__mmask8) __U);
2907}
2908
2909extern __inline __m128i
2910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2911_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2912{
2913 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2914 (__v2di)
2915 _mm_setzero_si128 (),
2916 (__mmask8) __U);
2917}
2918
2919extern __inline __m256i
2920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2921_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2922{
2923 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2924 (__v4di) __W,
2925 (__mmask8) __U);
2926}
2927
2928extern __inline __m256i
2929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2931{
2932 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2933 (__v4di)
2934 _mm256_setzero_si256 (),
2935 (__mmask8) __U);
2936}
2937
2938extern __inline __m128i
2939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2940_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2941{
2942 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2943 (__v2di) __W,
2944 (__mmask8) __U);
2945}
2946
2947extern __inline __m128i
2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2950{
2951 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2952 (__v2di)
2953 _mm_setzero_si128 (),
2954 (__mmask8) __U);
2955}
2956
2957extern __inline __m256i
2958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2959_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2960{
2961 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2962 (__v8si) __W,
2963 (__mmask8) __U);
2964}
2965
2966extern __inline __m256i
2967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2968_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2969{
2970 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2971 (__v8si)
2972 _mm256_setzero_si256 (),
2973 (__mmask8) __U);
2974}
2975
2976extern __inline __m128i
2977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2978_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2979{
2980 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2981 (__v4si) __W,
2982 (__mmask8) __U);
2983}
2984
2985extern __inline __m128i
2986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2987_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2988{
2989 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2990 (__v4si)
2991 _mm_setzero_si128 (),
2992 (__mmask8) __U);
2993}
2994
2995extern __inline __m256i
2996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2997_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2998{
2999 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3000 (__v4di) __W,
3001 (__mmask8) __U);
3002}
3003
3004extern __inline __m256i
3005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3007{
3008 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3009 (__v4di)
3010 _mm256_setzero_si256 (),
3011 (__mmask8) __U);
3012}
3013
3014extern __inline __m128i
3015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3016_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3017{
3018 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3019 (__v2di) __W,
3020 (__mmask8) __U);
3021}
3022
3023extern __inline __m128i
3024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3025_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3026{
3027 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3028 (__v2di)
3029 _mm_setzero_si128 (),
3030 (__mmask8) __U);
3031}
3032
3033extern __inline __m256i
3034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3036{
3037 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3038 (__v8si) __W,
3039 (__mmask8) __U);
3040}
3041
3042extern __inline __m256i
3043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3045{
3046 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3047 (__v8si)
3048 _mm256_setzero_si256 (),
3049 (__mmask8) __U);
3050}
3051
3052extern __inline __m128i
3053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3054_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3055{
3056 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3057 (__v4si) __W,
3058 (__mmask8) __U);
3059}
3060
3061extern __inline __m128i
3062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3063_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3064{
3065 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3066 (__v4si)
3067 _mm_setzero_si128 (),
3068 (__mmask8) __U);
3069}
3070
3071extern __inline __m256i
3072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3073_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3074{
3075 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3076 (__v4di) __W,
3077 (__mmask8) __U);
3078}
3079
3080extern __inline __m256i
3081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3082_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3083{
3084 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3085 (__v4di)
3086 _mm256_setzero_si256 (),
3087 (__mmask8) __U);
3088}
3089
3090extern __inline __m128i
3091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3092_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3093{
3094 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3095 (__v2di) __W,
3096 (__mmask8) __U);
3097}
3098
3099extern __inline __m128i
3100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3101_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3102{
3103 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3104 (__v2di)
3105 _mm_setzero_si128 (),
3106 (__mmask8) __U);
3107}
3108
3109extern __inline __m256i
3110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3111_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3112{
3113 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3114 (__v4di) __W,
3115 (__mmask8) __U);
3116}
3117
3118extern __inline __m256i
3119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3120_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3121{
3122 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3123 (__v4di)
3124 _mm256_setzero_si256 (),
3125 (__mmask8) __U);
3126}
3127
3128extern __inline __m128i
3129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3131{
3132 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3133 (__v2di) __W,
3134 (__mmask8) __U);
3135}
3136
3137extern __inline __m128i
3138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3140{
3141 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3142 (__v2di)
3143 _mm_setzero_si128 (),
3144 (__mmask8) __U);
3145}
3146
3147extern __inline __m256d
3148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3149_mm256_rcp14_pd (__m256d __A)
3150{
3151 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3152 (__v4df)
3153 _mm256_setzero_pd (),
3154 (__mmask8) -1);
3155}
3156
3157extern __inline __m256d
3158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3159_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3160{
3161 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3162 (__v4df) __W,
3163 (__mmask8) __U);
3164}
3165
3166extern __inline __m256d
3167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3168_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3169{
3170 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3171 (__v4df)
3172 _mm256_setzero_pd (),
3173 (__mmask8) __U);
3174}
3175
3176extern __inline __m128d
3177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3178_mm_rcp14_pd (__m128d __A)
3179{
3180 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3181 (__v2df)
3182 _mm_setzero_pd (),
3183 (__mmask8) -1);
3184}
3185
3186extern __inline __m128d
3187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3188_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3189{
3190 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3191 (__v2df) __W,
3192 (__mmask8) __U);
3193}
3194
3195extern __inline __m128d
3196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3197_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3198{
3199 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3200 (__v2df)
3201 _mm_setzero_pd (),
3202 (__mmask8) __U);
3203}
3204
3205extern __inline __m256
3206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207_mm256_rcp14_ps (__m256 __A)
3208{
3209 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3210 (__v8sf)
3211 _mm256_setzero_ps (),
3212 (__mmask8) -1);
3213}
3214
3215extern __inline __m256
3216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3217_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3218{
3219 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3220 (__v8sf) __W,
3221 (__mmask8) __U);
3222}
3223
3224extern __inline __m256
3225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3226_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3227{
3228 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3229 (__v8sf)
3230 _mm256_setzero_ps (),
3231 (__mmask8) __U);
3232}
3233
3234extern __inline __m128
3235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3236_mm_rcp14_ps (__m128 __A)
3237{
3238 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3239 (__v4sf)
3240 _mm_setzero_ps (),
3241 (__mmask8) -1);
3242}
3243
3244extern __inline __m128
3245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3246_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3247{
3248 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3249 (__v4sf) __W,
3250 (__mmask8) __U);
3251}
3252
3253extern __inline __m128
3254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3255_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3256{
3257 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3258 (__v4sf)
3259 _mm_setzero_ps (),
3260 (__mmask8) __U);
3261}
3262
3263extern __inline __m256d
3264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3265_mm256_rsqrt14_pd (__m256d __A)
3266{
3267 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3268 (__v4df)
3269 _mm256_setzero_pd (),
3270 (__mmask8) -1);
3271}
3272
3273extern __inline __m256d
3274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3276{
3277 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3278 (__v4df) __W,
3279 (__mmask8) __U);
3280}
3281
3282extern __inline __m256d
3283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3284_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3285{
3286 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3287 (__v4df)
3288 _mm256_setzero_pd (),
3289 (__mmask8) __U);
3290}
3291
3292extern __inline __m128d
3293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3294_mm_rsqrt14_pd (__m128d __A)
3295{
3296 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3297 (__v2df)
3298 _mm_setzero_pd (),
3299 (__mmask8) -1);
3300}
3301
3302extern __inline __m128d
3303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3304_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3305{
3306 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3307 (__v2df) __W,
3308 (__mmask8) __U);
3309}
3310
3311extern __inline __m128d
3312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3313_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3314{
3315 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3316 (__v2df)
3317 _mm_setzero_pd (),
3318 (__mmask8) __U);
3319}
3320
3321extern __inline __m256
3322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3323_mm256_rsqrt14_ps (__m256 __A)
3324{
3325 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3326 (__v8sf)
3327 _mm256_setzero_ps (),
3328 (__mmask8) -1);
3329}
3330
3331extern __inline __m256
3332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3333_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3334{
3335 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3336 (__v8sf) __W,
3337 (__mmask8) __U);
3338}
3339
3340extern __inline __m256
3341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3343{
3344 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3345 (__v8sf)
3346 _mm256_setzero_ps (),
3347 (__mmask8) __U);
3348}
3349
3350extern __inline __m128
3351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352_mm_rsqrt14_ps (__m128 __A)
3353{
3354 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3355 (__v4sf)
3356 _mm_setzero_ps (),
3357 (__mmask8) -1);
3358}
3359
3360extern __inline __m128
3361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3362_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3363{
3364 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3365 (__v4sf) __W,
3366 (__mmask8) __U);
3367}
3368
3369extern __inline __m128
3370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3371_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3372{
3373 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3374 (__v4sf)
3375 _mm_setzero_ps (),
3376 (__mmask8) __U);
3377}
3378
3379extern __inline __m256d
3380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3381_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3382{
3383 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3384 (__v4df) __W,
3385 (__mmask8) __U);
3386}
3387
3388extern __inline __m256d
3389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3390_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3391{
3392 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3393 (__v4df)
3394 _mm256_setzero_pd (),
3395 (__mmask8) __U);
3396}
3397
3398extern __inline __m128d
3399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3400_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3401{
3402 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3403 (__v2df) __W,
3404 (__mmask8) __U);
3405}
3406
3407extern __inline __m128d
3408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3410{
3411 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3412 (__v2df)
3413 _mm_setzero_pd (),
3414 (__mmask8) __U);
3415}
3416
3417extern __inline __m256
3418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3419_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3420{
3421 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3422 (__v8sf) __W,
3423 (__mmask8) __U);
3424}
3425
3426extern __inline __m256
3427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3429{
3430 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3431 (__v8sf)
3432 _mm256_setzero_ps (),
3433 (__mmask8) __U);
3434}
3435
3436extern __inline __m128
3437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3439{
3440 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3441 (__v4sf) __W,
3442 (__mmask8) __U);
3443}
3444
3445extern __inline __m128
3446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3448{
3449 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3450 (__v4sf)
3451 _mm_setzero_ps (),
3452 (__mmask8) __U);
3453}
3454
3455extern __inline __m256i
3456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3458 __m256i __B)
3459{
3460 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3461 (__v8si) __B,
3462 (__v8si) __W,
3463 (__mmask8) __U);
3464}
3465
3466extern __inline __m256i
3467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3469{
3470 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3471 (__v8si) __B,
3472 (__v8si)
3473 _mm256_setzero_si256 (),
3474 (__mmask8) __U);
3475}
3476
3477extern __inline __m256i
3478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3480 __m256i __B)
3481{
3482 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3483 (__v4di) __B,
3484 (__v4di) __W,
3485 (__mmask8) __U);
3486}
3487
3488extern __inline __m256i
3489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3491{
3492 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3493 (__v4di) __B,
3494 (__v4di)
3495 _mm256_setzero_si256 (),
3496 (__mmask8) __U);
3497}
3498
3499extern __inline __m256i
3500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3502 __m256i __B)
3503{
3504 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3505 (__v8si) __B,
3506 (__v8si) __W,
3507 (__mmask8) __U);
3508}
3509
3510extern __inline __m256i
3511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3513{
3514 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3515 (__v8si) __B,
3516 (__v8si)
3517 _mm256_setzero_si256 (),
3518 (__mmask8) __U);
3519}
3520
3521extern __inline __m256i
3522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3524 __m256i __B)
3525{
3526 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3527 (__v4di) __B,
3528 (__v4di) __W,
3529 (__mmask8) __U);
3530}
3531
3532extern __inline __m256i
3533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3535{
3536 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3537 (__v4di) __B,
3538 (__v4di)
3539 _mm256_setzero_si256 (),
3540 (__mmask8) __U);
3541}
3542
3543extern __inline __m128i
3544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3546 __m128i __B)
3547{
3548 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3549 (__v4si) __B,
3550 (__v4si) __W,
3551 (__mmask8) __U);
3552}
3553
3554extern __inline __m128i
3555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3557{
3558 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3559 (__v4si) __B,
3560 (__v4si)
3561 _mm_setzero_si128 (),
3562 (__mmask8) __U);
3563}
3564
3565extern __inline __m128i
3566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3568 __m128i __B)
3569{
3570 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3571 (__v2di) __B,
3572 (__v2di) __W,
3573 (__mmask8) __U);
3574}
3575
3576extern __inline __m128i
3577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3579{
3580 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3581 (__v2di) __B,
3582 (__v2di)
3583 _mm_setzero_si128 (),
3584 (__mmask8) __U);
3585}
3586
3587extern __inline __m128i
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3590 __m128i __B)
3591{
3592 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3593 (__v4si) __B,
3594 (__v4si) __W,
3595 (__mmask8) __U);
3596}
3597
3598extern __inline __m128i
3599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3601{
3602 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3603 (__v4si) __B,
3604 (__v4si)
3605 _mm_setzero_si128 (),
3606 (__mmask8) __U);
3607}
3608
3609extern __inline __m128i
3610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3612 __m128i __B)
3613{
3614 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3615 (__v2di) __B,
3616 (__v2di) __W,
3617 (__mmask8) __U);
3618}
3619
3620extern __inline __m128i
3621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3623{
3624 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3625 (__v2di) __B,
3626 (__v2di)
3627 _mm_setzero_si128 (),
3628 (__mmask8) __U);
3629}
3630
3631extern __inline __m256
3632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3633_mm256_getexp_ps (__m256 __A)
3634{
3635 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3636 (__v8sf)
3637 _mm256_setzero_ps (),
3638 (__mmask8) -1);
3639}
3640
3641extern __inline __m256
3642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3643_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3644{
3645 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3646 (__v8sf) __W,
3647 (__mmask8) __U);
3648}
3649
3650extern __inline __m256
3651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3652_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3653{
3654 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3655 (__v8sf)
3656 _mm256_setzero_ps (),
3657 (__mmask8) __U);
3658}
3659
3660extern __inline __m256d
3661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3662_mm256_getexp_pd (__m256d __A)
3663{
3664 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3665 (__v4df)
3666 _mm256_setzero_pd (),
3667 (__mmask8) -1);
3668}
3669
3670extern __inline __m256d
3671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3672_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3673{
3674 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3675 (__v4df) __W,
3676 (__mmask8) __U);
3677}
3678
3679extern __inline __m256d
3680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3681_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3682{
3683 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3684 (__v4df)
3685 _mm256_setzero_pd (),
3686 (__mmask8) __U);
3687}
3688
3689extern __inline __m128
3690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3691_mm_getexp_ps (__m128 __A)
3692{
3693 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3694 (__v4sf)
3695 _mm_setzero_ps (),
3696 (__mmask8) -1);
3697}
3698
3699extern __inline __m128
3700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3701_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3702{
3703 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3704 (__v4sf) __W,
3705 (__mmask8) __U);
3706}
3707
3708extern __inline __m128
3709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3711{
3712 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3713 (__v4sf)
3714 _mm_setzero_ps (),
3715 (__mmask8) __U);
3716}
3717
3718extern __inline __m128d
3719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3720_mm_getexp_pd (__m128d __A)
3721{
3722 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3723 (__v2df)
3724 _mm_setzero_pd (),
3725 (__mmask8) -1);
3726}
3727
3728extern __inline __m128d
3729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3730_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3731{
3732 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3733 (__v2df) __W,
3734 (__mmask8) __U);
3735}
3736
3737extern __inline __m128d
3738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3739_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3740{
3741 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3742 (__v2df)
3743 _mm_setzero_pd (),
3744 (__mmask8) __U);
3745}
3746
3747extern __inline __m256i
3748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3749_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3750 __m128i __B)
3751{
3752 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3753 (__v4si) __B,
3754 (__v8si) __W,
3755 (__mmask8) __U);
3756}
3757
3758extern __inline __m256i
3759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3760_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3761{
3762 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3763 (__v4si) __B,
3764 (__v8si)
3765 _mm256_setzero_si256 (),
3766 (__mmask8) __U);
3767}
3768
3769extern __inline __m128i
3770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3772 __m128i __B)
3773{
3774 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3775 (__v4si) __B,
3776 (__v4si) __W,
3777 (__mmask8) __U);
3778}
3779
3780extern __inline __m128i
3781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3783{
3784 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3785 (__v4si) __B,
3786 (__v4si)
3787 _mm_setzero_si128 (),
3788 (__mmask8) __U);
3789}
3790
3791extern __inline __m256i
3792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3794 __m128i __B)
3795{
3796 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3797 (__v2di) __B,
3798 (__v4di) __W,
3799 (__mmask8) __U);
3800}
3801
3802extern __inline __m256i
3803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3805{
3806 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3807 (__v2di) __B,
3808 (__v4di)
3809 _mm256_setzero_si256 (),
3810 (__mmask8) __U);
3811}
3812
3813extern __inline __m128i
3814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3816 __m128i __B)
3817{
3818 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3819 (__v2di) __B,
3820 (__v2di) __W,
3821 (__mmask8) __U);
3822}
3823
3824extern __inline __m128i
3825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3826_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3827{
3828 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3829 (__v2di) __B,
3830 (__v2di)
a25a7887 3831 _mm_setzero_si128 (),
936c0fe4
AI
3832 (__mmask8) __U);
3833}
3834
3835extern __inline __m256i
3836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3838 __m256i __B)
3839{
3840 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3841 (__v8si) __B,
3842 (__v8si) __W,
3843 (__mmask8) __U);
3844}
3845
3846extern __inline __m256i
3847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3849{
3850 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3851 (__v8si) __B,
3852 (__v8si)
3853 _mm256_setzero_si256 (),
3854 (__mmask8) __U);
3855}
3856
3857extern __inline __m256d
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm256_scalef_pd (__m256d __A, __m256d __B)
3860{
3861 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3862 (__v4df) __B,
3863 (__v4df)
3864 _mm256_setzero_pd (),
3865 (__mmask8) -1);
3866}
3867
3868extern __inline __m256d
3869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3871 __m256d __B)
3872{
3873 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3874 (__v4df) __B,
3875 (__v4df) __W,
3876 (__mmask8) __U);
3877}
3878
3879extern __inline __m256d
3880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3882{
3883 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3884 (__v4df) __B,
3885 (__v4df)
3886 _mm256_setzero_pd (),
3887 (__mmask8) __U);
3888}
3889
3890extern __inline __m256
3891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892_mm256_scalef_ps (__m256 __A, __m256 __B)
3893{
3894 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3895 (__v8sf) __B,
3896 (__v8sf)
3897 _mm256_setzero_ps (),
3898 (__mmask8) -1);
3899}
3900
3901extern __inline __m256
3902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3904 __m256 __B)
3905{
3906 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3907 (__v8sf) __B,
3908 (__v8sf) __W,
3909 (__mmask8) __U);
3910}
3911
3912extern __inline __m256
3913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3914_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3915{
3916 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3917 (__v8sf) __B,
3918 (__v8sf)
3919 _mm256_setzero_ps (),
3920 (__mmask8) __U);
3921}
3922
3923extern __inline __m128d
3924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925_mm_scalef_pd (__m128d __A, __m128d __B)
3926{
3927 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3928 (__v2df) __B,
3929 (__v2df)
3930 _mm_setzero_pd (),
3931 (__mmask8) -1);
3932}
3933
3934extern __inline __m128d
3935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3937 __m128d __B)
3938{
3939 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3940 (__v2df) __B,
3941 (__v2df) __W,
3942 (__mmask8) __U);
3943}
3944
3945extern __inline __m128d
3946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3947_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3948{
3949 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3950 (__v2df) __B,
3951 (__v2df)
3952 _mm_setzero_pd (),
3953 (__mmask8) __U);
3954}
3955
3956extern __inline __m128
3957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3958_mm_scalef_ps (__m128 __A, __m128 __B)
3959{
3960 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3961 (__v4sf) __B,
3962 (__v4sf)
3963 _mm_setzero_ps (),
3964 (__mmask8) -1);
3965}
3966
3967extern __inline __m128
3968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3969_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3970{
3971 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3972 (__v4sf) __B,
3973 (__v4sf) __W,
3974 (__mmask8) __U);
3975}
3976
3977extern __inline __m128
3978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3980{
3981 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3982 (__v4sf) __B,
3983 (__v4sf)
3984 _mm_setzero_ps (),
3985 (__mmask8) __U);
3986}
3987
3988extern __inline __m256d
3989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3991 __m256d __C)
3992{
3993 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3994 (__v4df) __B,
3995 (__v4df) __C,
3996 (__mmask8) __U);
3997}
3998
3999extern __inline __m256d
4000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4002 __mmask8 __U)
4003{
4004 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4005 (__v4df) __B,
4006 (__v4df) __C,
4007 (__mmask8) __U);
4008}
4009
4010extern __inline __m256d
4011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4013 __m256d __C)
4014{
4015 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4016 (__v4df) __B,
4017 (__v4df) __C,
4018 (__mmask8) __U);
4019}
4020
4021extern __inline __m128d
4022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4023_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4024{
4025 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4026 (__v2df) __B,
4027 (__v2df) __C,
4028 (__mmask8) __U);
4029}
4030
4031extern __inline __m128d
4032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4034 __mmask8 __U)
4035{
4036 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4037 (__v2df) __B,
4038 (__v2df) __C,
4039 (__mmask8) __U);
4040}
4041
4042extern __inline __m128d
4043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4044_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4045 __m128d __C)
4046{
4047 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4048 (__v2df) __B,
4049 (__v2df) __C,
4050 (__mmask8) __U);
4051}
4052
4053extern __inline __m256
4054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4055_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4056{
4057 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4058 (__v8sf) __B,
4059 (__v8sf) __C,
4060 (__mmask8) __U);
4061}
4062
4063extern __inline __m256
4064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4066 __mmask8 __U)
4067{
4068 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4069 (__v8sf) __B,
4070 (__v8sf) __C,
4071 (__mmask8) __U);
4072}
4073
4074extern __inline __m256
4075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4077 __m256 __C)
4078{
4079 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4080 (__v8sf) __B,
4081 (__v8sf) __C,
4082 (__mmask8) __U);
4083}
4084
4085extern __inline __m128
4086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4087_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4088{
4089 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4090 (__v4sf) __B,
4091 (__v4sf) __C,
4092 (__mmask8) __U);
4093}
4094
4095extern __inline __m128
4096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4097_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4098{
4099 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4100 (__v4sf) __B,
4101 (__v4sf) __C,
4102 (__mmask8) __U);
4103}
4104
4105extern __inline __m128
4106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4107_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4108{
4109 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4110 (__v4sf) __B,
4111 (__v4sf) __C,
4112 (__mmask8) __U);
4113}
4114
4115extern __inline __m256d
4116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4117_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4118 __m256d __C)
4119{
4120 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4121 (__v4df) __B,
4122 -(__v4df) __C,
4123 (__mmask8) __U);
4124}
4125
4126extern __inline __m256d
4127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4129 __mmask8 __U)
4130{
4131 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4132 (__v4df) __B,
4133 (__v4df) __C,
4134 (__mmask8) __U);
4135}
4136
4137extern __inline __m256d
4138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4140 __m256d __C)
4141{
4142 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4143 (__v4df) __B,
4144 -(__v4df) __C,
4145 (__mmask8) __U);
4146}
4147
4148extern __inline __m128d
4149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4150_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4151{
4152 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4153 (__v2df) __B,
4154 -(__v2df) __C,
4155 (__mmask8) __U);
4156}
4157
4158extern __inline __m128d
4159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4160_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4161 __mmask8 __U)
4162{
4163 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4164 (__v2df) __B,
4165 (__v2df) __C,
4166 (__mmask8) __U);
4167}
4168
4169extern __inline __m128d
4170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4172 __m128d __C)
4173{
4174 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4175 (__v2df) __B,
4176 -(__v2df) __C,
4177 (__mmask8) __U);
4178}
4179
4180extern __inline __m256
4181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4182_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4183{
4184 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4185 (__v8sf) __B,
4186 -(__v8sf) __C,
4187 (__mmask8) __U);
4188}
4189
4190extern __inline __m256
4191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4193 __mmask8 __U)
4194{
4195 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4196 (__v8sf) __B,
4197 (__v8sf) __C,
4198 (__mmask8) __U);
4199}
4200
4201extern __inline __m256
4202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4204 __m256 __C)
4205{
4206 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4207 (__v8sf) __B,
4208 -(__v8sf) __C,
4209 (__mmask8) __U);
4210}
4211
4212extern __inline __m128
4213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4215{
4216 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4217 (__v4sf) __B,
4218 -(__v4sf) __C,
4219 (__mmask8) __U);
4220}
4221
4222extern __inline __m128
4223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4225{
4226 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4227 (__v4sf) __B,
4228 (__v4sf) __C,
4229 (__mmask8) __U);
4230}
4231
4232extern __inline __m128
4233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4235{
4236 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4237 (__v4sf) __B,
4238 -(__v4sf) __C,
4239 (__mmask8) __U);
4240}
4241
4242extern __inline __m256d
4243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4245 __m256d __C)
4246{
4247 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4248 (__v4df) __B,
4249 (__v4df) __C,
4250 (__mmask8) __U);
4251}
4252
4253extern __inline __m256d
4254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4255_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4256 __mmask8 __U)
4257{
4258 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4259 (__v4df) __B,
4260 (__v4df) __C,
4261 (__mmask8)
4262 __U);
4263}
4264
4265extern __inline __m256d
4266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4267_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4268 __m256d __C)
4269{
4270 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4271 (__v4df) __B,
4272 (__v4df) __C,
4273 (__mmask8)
4274 __U);
4275}
4276
4277extern __inline __m128d
4278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4280 __m128d __C)
4281{
4282 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4283 (__v2df) __B,
4284 (__v2df) __C,
4285 (__mmask8) __U);
4286}
4287
4288extern __inline __m128d
4289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4290_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4291 __mmask8 __U)
4292{
4293 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4294 (__v2df) __B,
4295 (__v2df) __C,
4296 (__mmask8)
4297 __U);
4298}
4299
4300extern __inline __m128d
4301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4302_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4303 __m128d __C)
4304{
4305 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4306 (__v2df) __B,
4307 (__v2df) __C,
4308 (__mmask8)
4309 __U);
4310}
4311
4312extern __inline __m256
4313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4315 __m256 __C)
4316{
4317 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4318 (__v8sf) __B,
4319 (__v8sf) __C,
4320 (__mmask8) __U);
4321}
4322
4323extern __inline __m256
4324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4326 __mmask8 __U)
4327{
4328 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4329 (__v8sf) __B,
4330 (__v8sf) __C,
4331 (__mmask8) __U);
4332}
4333
4334extern __inline __m256
4335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4337 __m256 __C)
4338{
4339 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4340 (__v8sf) __B,
4341 (__v8sf) __C,
4342 (__mmask8) __U);
4343}
4344
4345extern __inline __m128
4346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4347_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4348{
4349 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4350 (__v4sf) __B,
4351 (__v4sf) __C,
4352 (__mmask8) __U);
4353}
4354
4355extern __inline __m128
4356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4357_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4358 __mmask8 __U)
4359{
4360 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4361 (__v4sf) __B,
4362 (__v4sf) __C,
4363 (__mmask8) __U);
4364}
4365
4366extern __inline __m128
4367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4369 __m128 __C)
4370{
4371 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4372 (__v4sf) __B,
4373 (__v4sf) __C,
4374 (__mmask8) __U);
4375}
4376
4377extern __inline __m256d
4378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4380 __m256d __C)
4381{
4382 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4383 (__v4df) __B,
4384 -(__v4df) __C,
4385 (__mmask8) __U);
4386}
4387
4388extern __inline __m256d
4389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4390_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4391 __mmask8 __U)
4392{
4393 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4394 (__v4df) __B,
4395 (__v4df) __C,
4396 (__mmask8)
4397 __U);
4398}
4399
4400extern __inline __m256d
4401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4402_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4403 __m256d __C)
4404{
4405 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4406 (__v4df) __B,
4407 -(__v4df) __C,
4408 (__mmask8)
4409 __U);
4410}
4411
4412extern __inline __m128d
4413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4415 __m128d __C)
4416{
4417 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4418 (__v2df) __B,
4419 -(__v2df) __C,
4420 (__mmask8) __U);
4421}
4422
4423extern __inline __m128d
4424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4425_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4426 __mmask8 __U)
4427{
4428 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4429 (__v2df) __B,
4430 (__v2df) __C,
4431 (__mmask8)
4432 __U);
4433}
4434
4435extern __inline __m128d
4436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4437_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4438 __m128d __C)
4439{
4440 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4441 (__v2df) __B,
4442 -(__v2df) __C,
4443 (__mmask8)
4444 __U);
4445}
4446
4447extern __inline __m256
4448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4450 __m256 __C)
4451{
4452 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4453 (__v8sf) __B,
4454 -(__v8sf) __C,
4455 (__mmask8) __U);
4456}
4457
4458extern __inline __m256
4459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4461 __mmask8 __U)
4462{
4463 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4464 (__v8sf) __B,
4465 (__v8sf) __C,
4466 (__mmask8) __U);
4467}
4468
4469extern __inline __m256
4470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4472 __m256 __C)
4473{
4474 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4475 (__v8sf) __B,
4476 -(__v8sf) __C,
4477 (__mmask8) __U);
4478}
4479
4480extern __inline __m128
4481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4482_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4483{
4484 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4485 (__v4sf) __B,
4486 -(__v4sf) __C,
4487 (__mmask8) __U);
4488}
4489
4490extern __inline __m128
4491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4493 __mmask8 __U)
4494{
4495 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4496 (__v4sf) __B,
4497 (__v4sf) __C,
4498 (__mmask8) __U);
4499}
4500
4501extern __inline __m128
4502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4504 __m128 __C)
4505{
4506 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4507 (__v4sf) __B,
4508 -(__v4sf) __C,
4509 (__mmask8) __U);
4510}
4511
4512extern __inline __m256d
4513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4515 __m256d __C)
4516{
4517 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4518 (__v4df) __B,
4519 (__v4df) __C,
4520 (__mmask8) __U);
4521}
4522
4523extern __inline __m256d
4524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4526 __mmask8 __U)
4527{
4528 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
4529 (__v4df) __B,
4530 (__v4df) __C,
4531 (__mmask8) __U);
4532}
4533
4534extern __inline __m256d
4535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4537 __m256d __C)
4538{
4539 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4540 (__v4df) __B,
4541 (__v4df) __C,
4542 (__mmask8) __U);
4543}
4544
4545extern __inline __m128d
4546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4548 __m128d __C)
4549{
4550 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4551 (__v2df) __B,
4552 (__v2df) __C,
4553 (__mmask8) __U);
4554}
4555
4556extern __inline __m128d
4557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4558_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4559 __mmask8 __U)
4560{
4561 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
4562 (__v2df) __B,
4563 (__v2df) __C,
4564 (__mmask8) __U);
4565}
4566
4567extern __inline __m128d
4568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4570 __m128d __C)
4571{
4572 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4573 (__v2df) __B,
4574 (__v2df) __C,
4575 (__mmask8) __U);
4576}
4577
4578extern __inline __m256
4579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4581 __m256 __C)
4582{
4583 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4584 (__v8sf) __B,
4585 (__v8sf) __C,
4586 (__mmask8) __U);
4587}
4588
4589extern __inline __m256
4590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4592 __mmask8 __U)
4593{
4594 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
4595 (__v8sf) __B,
4596 (__v8sf) __C,
4597 (__mmask8) __U);
4598}
4599
4600extern __inline __m256
4601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4602_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4603 __m256 __C)
4604{
4605 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4606 (__v8sf) __B,
4607 (__v8sf) __C,
4608 (__mmask8) __U);
4609}
4610
4611extern __inline __m128
4612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4613_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4614{
4615 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4616 (__v4sf) __B,
4617 (__v4sf) __C,
4618 (__mmask8) __U);
4619}
4620
4621extern __inline __m128
4622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4623_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4624{
4625 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
4626 (__v4sf) __B,
4627 (__v4sf) __C,
4628 (__mmask8) __U);
4629}
4630
4631extern __inline __m128
4632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4634{
4635 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4636 (__v4sf) __B,
4637 (__v4sf) __C,
4638 (__mmask8) __U);
4639}
4640
4641extern __inline __m256d
4642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4644 __m256d __C)
4645{
4646 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4647 (__v4df) __B,
4648 (__v4df) __C,
4649 (__mmask8) __U);
4650}
4651
4652extern __inline __m256d
4653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4655 __mmask8 __U)
4656{
4657 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4658 (__v4df) __B,
4659 (__v4df) __C,
4660 (__mmask8) __U);
4661}
4662
4663extern __inline __m256d
4664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4666 __m256d __C)
4667{
4668 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4669 (__v4df) __B,
4670 -(__v4df) __C,
4671 (__mmask8) __U);
4672}
4673
4674extern __inline __m128d
4675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4677 __m128d __C)
4678{
4679 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4680 (__v2df) __B,
4681 (__v2df) __C,
4682 (__mmask8) __U);
4683}
4684
4685extern __inline __m128d
4686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4688 __mmask8 __U)
4689{
4690 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4691 (__v2df) __B,
4692 (__v2df) __C,
4693 (__mmask8) __U);
4694}
4695
4696extern __inline __m128d
4697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4699 __m128d __C)
4700{
4701 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4702 (__v2df) __B,
4703 -(__v2df) __C,
4704 (__mmask8) __U);
4705}
4706
4707extern __inline __m256
4708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4710 __m256 __C)
4711{
4712 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4713 (__v8sf) __B,
4714 (__v8sf) __C,
4715 (__mmask8) __U);
4716}
4717
4718extern __inline __m256
4719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4721 __mmask8 __U)
4722{
4723 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4724 (__v8sf) __B,
4725 (__v8sf) __C,
4726 (__mmask8) __U);
4727}
4728
4729extern __inline __m256
4730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4731_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4732 __m256 __C)
4733{
4734 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4735 (__v8sf) __B,
4736 -(__v8sf) __C,
4737 (__mmask8) __U);
4738}
4739
4740extern __inline __m128
4741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4743{
4744 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4745 (__v4sf) __B,
4746 (__v4sf) __C,
4747 (__mmask8) __U);
4748}
4749
4750extern __inline __m128
4751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4752_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4753{
4754 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4755 (__v4sf) __B,
4756 (__v4sf) __C,
4757 (__mmask8) __U);
4758}
4759
4760extern __inline __m128
4761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4763{
4764 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4765 (__v4sf) __B,
4766 -(__v4sf) __C,
4767 (__mmask8) __U);
4768}
4769
4770extern __inline __m128i
4771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4773 __m128i __B)
4774{
4775 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4776 (__v4si) __B,
4777 (__v4si) __W,
4778 (__mmask8) __U);
4779}
4780
4781extern __inline __m128i
4782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4784{
4785 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4786 (__v4si) __B,
4787 (__v4si)
4788 _mm_setzero_si128 (),
4789 (__mmask8) __U);
4790}
4791
4792extern __inline __m256i
4793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4795 __m256i __B)
4796{
4797 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4798 (__v8si) __B,
4799 (__v8si) __W,
4800 (__mmask8) __U);
4801}
4802
4803extern __inline __m256i
4804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4806{
4807 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4808 (__v8si) __B,
4809 (__v8si)
4810 _mm256_setzero_si256 (),
4811 (__mmask8) __U);
4812}
4813
4814extern __inline __m128i
4815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4817 __m128i __B)
4818{
4819 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4820 (__v4si) __B,
4821 (__v4si) __W,
4822 (__mmask8) __U);
4823}
4824
4825extern __inline __m128i
4826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4828{
4829 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4830 (__v4si) __B,
4831 (__v4si)
4832 _mm_setzero_si128 (),
4833 (__mmask8) __U);
4834}
4835
4836extern __inline __m256i
4837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4839 __m256i __B)
4840{
4841 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4842 (__v8si) __B,
4843 (__v8si) __W,
4844 (__mmask8) __U);
4845}
4846
4847extern __inline __m256i
4848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4850{
4851 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4852 (__v8si) __B,
4853 (__v8si)
4854 _mm256_setzero_si256 (),
4855 (__mmask8) __U);
4856}
4857
4858extern __inline __m128i
4859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4860_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4861{
4862 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4863 (__v4si) __B,
4864 (__v4si) __W,
4865 (__mmask8) __U);
4866}
4867
4868extern __inline __m128i
4869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4870_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4871{
4872 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4873 (__v4si) __B,
4874 (__v4si)
4875 _mm_setzero_si128 (),
4876 (__mmask8) __U);
4877}
4878
4879extern __inline __m256i
4880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4881_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4882 __m256i __B)
4883{
4884 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4885 (__v8si) __B,
4886 (__v8si) __W,
4887 (__mmask8) __U);
4888}
4889
4890extern __inline __m256i
4891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4892_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4893{
4894 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4895 (__v8si) __B,
4896 (__v8si)
4897 _mm256_setzero_si256 (),
4898 (__mmask8) __U);
4899}
4900
4901extern __inline __m128i
4902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4904 __m128i __B)
4905{
4906 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4907 (__v4si) __B,
4908 (__v4si) __W,
4909 (__mmask8) __U);
4910}
4911
4912extern __inline __m128i
4913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4915{
4916 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4917 (__v4si) __B,
4918 (__v4si)
4919 _mm_setzero_si128 (),
4920 (__mmask8) __U);
4921}
4922
4923extern __inline __m128
4924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4925_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4926{
4927 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4928 (__v4sf) __W,
4929 (__mmask8) __U);
4930}
4931
4932extern __inline __m128
4933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4934_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4935{
4936 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4937 (__v4sf)
4938 _mm_setzero_ps (),
4939 (__mmask8) __U);
4940}
4941
4942extern __inline __m128
4943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4945{
4946 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4947 (__v4sf) __W,
4948 (__mmask8) __U);
4949}
4950
4951extern __inline __m128
4952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4953_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4954{
4955 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4956 (__v4sf)
4957 _mm_setzero_ps (),
4958 (__mmask8) __U);
4959}
4960
4961extern __inline __m256i
4962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4963_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4964{
4965 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4966 (__v8si) __W,
4967 (__mmask8) __U);
4968}
4969
4970extern __inline __m256i
4971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4972_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4973{
4974 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4975 (__v8si)
4976 _mm256_setzero_si256 (),
4977 (__mmask8) __U);
4978}
4979
4980extern __inline __m128i
4981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4982_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
4983{
4984 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4985 (__v4si) __W,
4986 (__mmask8) __U);
4987}
4988
4989extern __inline __m128i
4990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4991_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
4992{
4993 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4994 (__v4si)
4995 _mm_setzero_si128 (),
4996 (__mmask8) __U);
4997}
4998
4999extern __inline __m256i
5000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5001_mm256_cvtps_epu32 (__m256 __A)
5002{
5003 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5004 (__v8si)
5005 _mm256_setzero_si256 (),
5006 (__mmask8) -1);
5007}
5008
5009extern __inline __m256i
5010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5011_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5012{
5013 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5014 (__v8si) __W,
5015 (__mmask8) __U);
5016}
5017
5018extern __inline __m256i
5019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5020_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5021{
5022 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5023 (__v8si)
5024 _mm256_setzero_si256 (),
5025 (__mmask8) __U);
5026}
5027
5028extern __inline __m128i
5029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5030_mm_cvtps_epu32 (__m128 __A)
5031{
5032 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5033 (__v4si)
5034 _mm_setzero_si128 (),
5035 (__mmask8) -1);
5036}
5037
5038extern __inline __m128i
5039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5040_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5041{
5042 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5043 (__v4si) __W,
5044 (__mmask8) __U);
5045}
5046
5047extern __inline __m128i
5048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5049_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5050{
5051 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5052 (__v4si)
5053 _mm_setzero_si128 (),
5054 (__mmask8) __U);
5055}
5056
5057extern __inline __m256d
5058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5059_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5060{
5061 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5062 (__v4df) __W,
5063 (__mmask8) __U);
5064}
5065
5066extern __inline __m256d
5067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5068_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5069{
5070 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5071 (__v4df)
5072 _mm256_setzero_pd (),
5073 (__mmask8) __U);
5074}
5075
5076extern __inline __m128d
5077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5078_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5079{
5080 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5081 (__v2df) __W,
5082 (__mmask8) __U);
5083}
5084
5085extern __inline __m128d
5086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5087_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5088{
5089 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5090 (__v2df)
5091 _mm_setzero_pd (),
5092 (__mmask8) __U);
5093}
5094
5095extern __inline __m256
5096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5097_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5098{
5099 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5100 (__v8sf) __W,
5101 (__mmask8) __U);
5102}
5103
5104extern __inline __m256
5105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5106_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5107{
5108 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5109 (__v8sf)
5110 _mm256_setzero_ps (),
5111 (__mmask8) __U);
5112}
5113
5114extern __inline __m128
5115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5116_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5117{
5118 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5119 (__v4sf) __W,
5120 (__mmask8) __U);
5121}
5122
5123extern __inline __m128
5124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5125_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5126{
5127 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5128 (__v4sf)
5129 _mm_setzero_ps (),
5130 (__mmask8) __U);
5131}
5132
5133extern __inline __m256
5134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5135_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5136{
5137 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5138 (__v8sf) __W,
5139 (__mmask8) __U);
5140}
5141
5142extern __inline __m256
5143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5144_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5145{
5146 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5147 (__v8sf)
5148 _mm256_setzero_ps (),
5149 (__mmask8) __U);
5150}
5151
5152extern __inline __m128
5153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5154_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5155{
5156 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5157 (__v4sf) __W,
5158 (__mmask8) __U);
5159}
5160
5161extern __inline __m128
5162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5163_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5164{
5165 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5166 (__v4sf)
5167 _mm_setzero_ps (),
5168 (__mmask8) __U);
5169}
5170
5171extern __inline __m128i
5172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5173_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5174 __m128i __B)
5175{
5176 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5177 (__v4si) __B,
5178 (__v4si) __W,
5179 (__mmask8) __U);
5180}
5181
5182extern __inline __m128i
5183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5185{
5186 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5187 (__v4si) __B,
5188 (__v4si)
5189 _mm_setzero_si128 (),
5190 (__mmask8) __U);
5191}
5192
5193extern __inline __m256i
5194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5195_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5196 __m256i __B)
5197{
5198 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5199 (__v8si) __B,
5200 (__v8si) __W,
5201 (__mmask8) __U);
5202}
5203
5204extern __inline __m256i
5205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5207{
5208 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5209 (__v8si) __B,
5210 (__v8si)
5211 _mm256_setzero_si256 (),
5212 (__mmask8) __U);
5213}
5214
5215extern __inline __m128i
5216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5218 __m128i __B)
5219{
5220 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5221 (__v2di) __B,
5222 (__v2di) __W,
5223 (__mmask8) __U);
5224}
5225
5226extern __inline __m128i
5227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5228_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5229{
5230 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5231 (__v2di) __B,
5232 (__v2di)
a25a7887 5233 _mm_setzero_si128 (),
936c0fe4
AI
5234 (__mmask8) __U);
5235}
5236
5237extern __inline __m256i
5238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5239_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5240 __m256i __B)
5241{
5242 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5243 (__v4di) __B,
5244 (__v4di) __W,
5245 (__mmask8) __U);
5246}
5247
5248extern __inline __m256i
5249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5250_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5251{
5252 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5253 (__v4di) __B,
5254 (__v4di)
5255 _mm256_setzero_si256 (),
5256 (__mmask8) __U);
5257}
5258
5259extern __inline __m128i
5260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5261_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5262 __m128i __B)
5263{
5264 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5265 (__v4si) __B,
5266 (__v4si) __W,
5267 (__mmask8) __U);
5268}
5269
5270extern __inline __m128i
5271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5272_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5273{
5274 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5275 (__v4si) __B,
5276 (__v4si)
5277 _mm_setzero_si128 (),
5278 (__mmask8) __U);
5279}
5280
5281extern __inline __m256i
5282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5283_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5284 __m256i __B)
5285{
5286 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5287 (__v8si) __B,
5288 (__v8si) __W,
5289 (__mmask8) __U);
5290}
5291
5292extern __inline __m256i
5293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5294_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5295{
5296 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5297 (__v8si) __B,
5298 (__v8si)
5299 _mm256_setzero_si256 (),
5300 (__mmask8) __U);
5301}
5302
5303extern __inline __m128i
5304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5305_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5306 __m128i __B)
5307{
5308 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5309 (__v2di) __B,
5310 (__v2di) __W,
5311 (__mmask8) __U);
5312}
5313
5314extern __inline __m128i
5315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5316_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5317{
5318 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5319 (__v2di) __B,
5320 (__v2di)
a25a7887 5321 _mm_setzero_si128 (),
936c0fe4
AI
5322 (__mmask8) __U);
5323}
5324
5325extern __inline __m256i
5326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5327_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5328 __m256i __B)
5329{
5330 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5331 (__v4di) __B,
5332 (__v4di) __W,
5333 (__mmask8) __U);
5334}
5335
5336extern __inline __m256i
5337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5339{
5340 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5341 (__v4di) __B,
5342 (__v4di)
5343 _mm256_setzero_si256 (),
5344 (__mmask8) __U);
5345}
5346
eee5d6f5
AI
5347extern __inline __mmask8
5348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5349_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5350{
5351 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5352 (__v4si) __B, 0,
5353 (__mmask8) -1);
5354}
5355
936c0fe4
AI
5356extern __inline __mmask8
5357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5359{
5360 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5361 (__v4si) __B,
5362 (__mmask8) -1);
5363}
5364
eee5d6f5
AI
5365extern __inline __mmask8
5366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5367_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5368{
5369 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5370 (__v4si) __B, 0, __U);
5371}
5372
936c0fe4
AI
5373extern __inline __mmask8
5374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5375_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5376{
5377 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5378 (__v4si) __B, __U);
5379}
5380
eee5d6f5
AI
5381extern __inline __mmask8
5382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5383_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5384{
5385 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5386 (__v8si) __B, 0,
5387 (__mmask8) -1);
5388}
5389
936c0fe4
AI
5390extern __inline __mmask8
5391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5392_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5393{
5394 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5395 (__v8si) __B,
5396 (__mmask8) -1);
5397}
5398
eee5d6f5
AI
5399extern __inline __mmask8
5400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5401_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5402{
5403 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5404 (__v8si) __B, 0, __U);
5405}
5406
936c0fe4
AI
5407extern __inline __mmask8
5408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5409_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5410{
5411 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5412 (__v8si) __B, __U);
5413}
5414
eee5d6f5
AI
5415extern __inline __mmask8
5416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5417_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5418{
5419 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5420 (__v2di) __B, 0,
5421 (__mmask8) -1);
5422}
5423
936c0fe4
AI
5424extern __inline __mmask8
5425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5426_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5427{
5428 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5429 (__v2di) __B,
5430 (__mmask8) -1);
5431}
5432
eee5d6f5
AI
5433extern __inline __mmask8
5434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5435_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5436{
5437 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5438 (__v2di) __B, 0, __U);
5439}
5440
936c0fe4
AI
5441extern __inline __mmask8
5442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5443_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5444{
5445 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5446 (__v2di) __B, __U);
5447}
5448
eee5d6f5
AI
5449extern __inline __mmask8
5450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5451_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5452{
5453 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5454 (__v4di) __B, 0,
5455 (__mmask8) -1);
5456}
5457
936c0fe4
AI
5458extern __inline __mmask8
5459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5460_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5461{
5462 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5463 (__v4di) __B,
5464 (__mmask8) -1);
5465}
5466
eee5d6f5
AI
5467extern __inline __mmask8
5468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5469_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5470{
5471 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5472 (__v4di) __B, 0, __U);
5473}
5474
936c0fe4
AI
5475extern __inline __mmask8
5476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5477_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5478{
5479 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5480 (__v4di) __B, __U);
5481}
5482
eee5d6f5
AI
5483extern __inline __mmask8
5484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5485_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5486{
5487 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5488 (__v4si) __B, 6,
5489 (__mmask8) -1);
5490}
5491
936c0fe4
AI
5492extern __inline __mmask8
5493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5494_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5495{
5496 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5497 (__v4si) __B,
5498 (__mmask8) -1);
5499}
5500
eee5d6f5
AI
5501extern __inline __mmask8
5502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5503_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5504{
5505 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5506 (__v4si) __B, 6, __U);
5507}
5508
936c0fe4
AI
5509extern __inline __mmask8
5510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5511_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5512{
5513 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5514 (__v4si) __B, __U);
5515}
5516
eee5d6f5
AI
5517extern __inline __mmask8
5518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5519_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5520{
5521 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5522 (__v8si) __B, 6,
5523 (__mmask8) -1);
5524}
5525
936c0fe4
AI
5526extern __inline __mmask8
5527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5528_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5529{
5530 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5531 (__v8si) __B,
5532 (__mmask8) -1);
5533}
5534
eee5d6f5
AI
5535extern __inline __mmask8
5536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5537_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5538{
5539 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5540 (__v8si) __B, 6, __U);
5541}
5542
936c0fe4
AI
5543extern __inline __mmask8
5544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5545_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5546{
5547 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5548 (__v8si) __B, __U);
5549}
5550
eee5d6f5
AI
5551extern __inline __mmask8
5552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5553_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5554{
5555 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5556 (__v2di) __B, 6,
5557 (__mmask8) -1);
5558}
5559
936c0fe4
AI
5560extern __inline __mmask8
5561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5562_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5563{
5564 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5565 (__v2di) __B,
5566 (__mmask8) -1);
5567}
5568
eee5d6f5
AI
5569extern __inline __mmask8
5570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5571_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5572{
5573 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5574 (__v2di) __B, 6, __U);
5575}
5576
936c0fe4
AI
5577extern __inline __mmask8
5578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5579_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5580{
5581 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5582 (__v2di) __B, __U);
5583}
5584
eee5d6f5
AI
5585extern __inline __mmask8
5586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5587_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5588{
5589 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5590 (__v4di) __B, 6,
5591 (__mmask8) -1);
5592}
5593
936c0fe4
AI
5594extern __inline __mmask8
5595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5596_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5597{
5598 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5599 (__v4di) __B,
5600 (__mmask8) -1);
5601}
5602
eee5d6f5
AI
5603extern __inline __mmask8
5604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5605_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5606{
5607 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5608 (__v4di) __B, 6, __U);
5609}
5610
936c0fe4
AI
5611extern __inline __mmask8
5612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5613_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5614{
5615 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5616 (__v4di) __B, __U);
5617}
5618
5619extern __inline __mmask8
5620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5621_mm_test_epi32_mask (__m128i __A, __m128i __B)
5622{
5623 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5624 (__v4si) __B,
5625 (__mmask8) -1);
5626}
5627
5628extern __inline __mmask8
5629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5630_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5631{
5632 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5633 (__v4si) __B, __U);
5634}
5635
5636extern __inline __mmask8
5637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5638_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5639{
5640 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5641 (__v8si) __B,
5642 (__mmask8) -1);
5643}
5644
5645extern __inline __mmask8
5646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5647_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5648{
5649 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5650 (__v8si) __B, __U);
5651}
5652
5653extern __inline __mmask8
5654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5655_mm_test_epi64_mask (__m128i __A, __m128i __B)
5656{
5657 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5658 (__v2di) __B,
5659 (__mmask8) -1);
5660}
5661
5662extern __inline __mmask8
5663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5664_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5665{
5666 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5667 (__v2di) __B, __U);
5668}
5669
5670extern __inline __mmask8
5671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5672_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5673{
5674 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5675 (__v4di) __B,
5676 (__mmask8) -1);
5677}
5678
5679extern __inline __mmask8
5680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5681_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5682{
5683 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5684 (__v4di) __B, __U);
5685}
5686
5687extern __inline __mmask8
5688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5689_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5690{
5691 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5692 (__v4si) __B,
5693 (__mmask8) -1);
5694}
5695
5696extern __inline __mmask8
5697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5698_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5699{
5700 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5701 (__v4si) __B, __U);
5702}
5703
5704extern __inline __mmask8
5705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5706_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5707{
5708 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5709 (__v8si) __B,
5710 (__mmask8) -1);
5711}
5712
5713extern __inline __mmask8
5714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5715_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5716{
5717 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5718 (__v8si) __B, __U);
5719}
5720
5721extern __inline __mmask8
5722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5723_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5724{
5725 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5726 (__v2di) __B,
5727 (__mmask8) -1);
5728}
5729
5730extern __inline __mmask8
5731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5732_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5733{
5734 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5735 (__v2di) __B, __U);
5736}
5737
5738extern __inline __mmask8
5739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5740_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5741{
5742 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5743 (__v4di) __B,
5744 (__mmask8) -1);
5745}
5746
5747extern __inline __mmask8
5748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5749_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5750{
5751 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5752 (__v4di) __B, __U);
5753}
5754
5755extern __inline __m256d
5756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5757_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5758{
5759 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5760 (__v4df) __W,
5761 (__mmask8) __U);
5762}
5763
5764extern __inline __m256d
5765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5766_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5767{
5768 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5769 (__v4df)
5770 _mm256_setzero_pd (),
5771 (__mmask8) __U);
5772}
5773
5774extern __inline void
5775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5776_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5777{
5778 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5779 (__v4df) __A,
5780 (__mmask8) __U);
5781}
5782
5783extern __inline __m128d
5784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5785_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5786{
5787 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5788 (__v2df) __W,
5789 (__mmask8) __U);
5790}
5791
5792extern __inline __m128d
5793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5794_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5795{
5796 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5797 (__v2df)
5798 _mm_setzero_pd (),
5799 (__mmask8) __U);
5800}
5801
5802extern __inline void
5803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5804_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5805{
5806 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5807 (__v2df) __A,
5808 (__mmask8) __U);
5809}
5810
5811extern __inline __m256
5812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5813_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5814{
5815 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5816 (__v8sf) __W,
5817 (__mmask8) __U);
5818}
5819
5820extern __inline __m256
5821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5822_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5823{
5824 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5825 (__v8sf)
5826 _mm256_setzero_ps (),
5827 (__mmask8) __U);
5828}
5829
5830extern __inline void
5831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5832_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5833{
5834 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5835 (__v8sf) __A,
5836 (__mmask8) __U);
5837}
5838
5839extern __inline __m128
5840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5841_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5842{
5843 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5844 (__v4sf) __W,
5845 (__mmask8) __U);
5846}
5847
5848extern __inline __m128
5849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5850_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5851{
5852 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5853 (__v4sf)
5854 _mm_setzero_ps (),
5855 (__mmask8) __U);
5856}
5857
5858extern __inline void
5859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5860_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5861{
5862 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5863 (__v4sf) __A,
5864 (__mmask8) __U);
5865}
5866
5867extern __inline __m256i
5868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5869_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5870{
5871 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5872 (__v4di) __W,
5873 (__mmask8) __U);
5874}
5875
5876extern __inline __m256i
5877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5878_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5879{
5880 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5881 (__v4di)
5882 _mm256_setzero_si256 (),
5883 (__mmask8) __U);
5884}
5885
5886extern __inline void
5887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5888_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5889{
5890 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5891 (__v4di) __A,
5892 (__mmask8) __U);
5893}
5894
5895extern __inline __m128i
5896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5897_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5898{
5899 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5900 (__v2di) __W,
5901 (__mmask8) __U);
5902}
5903
5904extern __inline __m128i
5905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5906_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5907{
5908 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5909 (__v2di)
a25a7887 5910 _mm_setzero_si128 (),
936c0fe4
AI
5911 (__mmask8) __U);
5912}
5913
5914extern __inline void
5915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5916_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5917{
5918 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5919 (__v2di) __A,
5920 (__mmask8) __U);
5921}
5922
5923extern __inline __m256i
5924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5925_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5926{
5927 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5928 (__v8si) __W,
5929 (__mmask8) __U);
5930}
5931
5932extern __inline __m256i
5933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5934_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5935{
5936 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5937 (__v8si)
5938 _mm256_setzero_si256 (),
5939 (__mmask8) __U);
5940}
5941
5942extern __inline void
5943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5944_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5945{
5946 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5947 (__v8si) __A,
5948 (__mmask8) __U);
5949}
5950
5951extern __inline __m128i
5952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5953_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5954{
5955 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5956 (__v4si) __W,
5957 (__mmask8) __U);
5958}
5959
5960extern __inline __m128i
5961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5962_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5963{
5964 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5965 (__v4si)
5966 _mm_setzero_si128 (),
5967 (__mmask8) __U);
5968}
5969
5970extern __inline void
5971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5972_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5973{
5974 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5975 (__v4si) __A,
5976 (__mmask8) __U);
5977}
5978
5979extern __inline __m256d
5980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5981_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
5982{
5983 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
5984 (__v4df) __W,
5985 (__mmask8) __U);
5986}
5987
5988extern __inline __m256d
5989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5990_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
5991{
5992 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
5993 (__v4df)
5994 _mm256_setzero_pd (),
5995 (__mmask8) __U);
5996}
5997
5998extern __inline __m256d
5999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6000_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6001{
6002 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
6003 (__v4df) __W,
6004 (__mmask8)
6005 __U);
6006}
6007
6008extern __inline __m256d
6009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6010_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6011{
6012 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6013 (__v4df)
6014 _mm256_setzero_pd (),
6015 (__mmask8)
6016 __U);
6017}
6018
6019extern __inline __m128d
6020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6021_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6022{
6023 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6024 (__v2df) __W,
6025 (__mmask8) __U);
6026}
6027
6028extern __inline __m128d
6029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6030_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6031{
6032 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6033 (__v2df)
6034 _mm_setzero_pd (),
6035 (__mmask8) __U);
6036}
6037
6038extern __inline __m128d
6039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6040_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6041{
6042 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6043 (__v2df) __W,
6044 (__mmask8)
6045 __U);
6046}
6047
6048extern __inline __m128d
6049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6051{
6052 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6053 (__v2df)
6054 _mm_setzero_pd (),
6055 (__mmask8)
6056 __U);
6057}
6058
6059extern __inline __m256
6060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6061_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6062{
6063 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6064 (__v8sf) __W,
6065 (__mmask8) __U);
6066}
6067
6068extern __inline __m256
6069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6070_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6071{
6072 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6073 (__v8sf)
6074 _mm256_setzero_ps (),
6075 (__mmask8) __U);
6076}
6077
6078extern __inline __m256
6079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6080_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6081{
6082 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6083 (__v8sf) __W,
6084 (__mmask8) __U);
6085}
6086
6087extern __inline __m256
6088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6089_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6090{
6091 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6092 (__v8sf)
6093 _mm256_setzero_ps (),
6094 (__mmask8)
6095 __U);
6096}
6097
6098extern __inline __m128
6099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6100_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6101{
6102 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6103 (__v4sf) __W,
6104 (__mmask8) __U);
6105}
6106
6107extern __inline __m128
6108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6109_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6110{
6111 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6112 (__v4sf)
6113 _mm_setzero_ps (),
6114 (__mmask8) __U);
6115}
6116
6117extern __inline __m128
6118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6119_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6120{
6121 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6122 (__v4sf) __W,
6123 (__mmask8) __U);
6124}
6125
6126extern __inline __m128
6127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6128_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6129{
6130 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6131 (__v4sf)
6132 _mm_setzero_ps (),
6133 (__mmask8)
6134 __U);
6135}
6136
6137extern __inline __m256i
6138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6139_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6140{
6141 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6142 (__v4di) __W,
6143 (__mmask8) __U);
6144}
6145
6146extern __inline __m256i
6147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6148_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6149{
6150 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6151 (__v4di)
6152 _mm256_setzero_si256 (),
6153 (__mmask8) __U);
6154}
6155
6156extern __inline __m256i
6157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6158_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6159 void const *__P)
6160{
6161 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6162 (__v4di) __W,
6163 (__mmask8)
6164 __U);
6165}
6166
6167extern __inline __m256i
6168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6169_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6170{
6171 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6172 (__v4di)
6173 _mm256_setzero_si256 (),
6174 (__mmask8)
6175 __U);
6176}
6177
6178extern __inline __m128i
6179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6180_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6181{
6182 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6183 (__v2di) __W,
6184 (__mmask8) __U);
6185}
6186
6187extern __inline __m128i
6188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6189_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6190{
6191 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6192 (__v2di)
6193 _mm_setzero_si128 (),
6194 (__mmask8) __U);
6195}
6196
6197extern __inline __m128i
6198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6199_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6200{
6201 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6202 (__v2di) __W,
6203 (__mmask8)
6204 __U);
6205}
6206
6207extern __inline __m128i
6208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6209_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6210{
6211 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6212 (__v2di)
6213 _mm_setzero_si128 (),
6214 (__mmask8)
6215 __U);
6216}
6217
6218extern __inline __m256i
6219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6220_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6221{
6222 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6223 (__v8si) __W,
6224 (__mmask8) __U);
6225}
6226
6227extern __inline __m256i
6228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6229_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6230{
6231 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6232 (__v8si)
6233 _mm256_setzero_si256 (),
6234 (__mmask8) __U);
6235}
6236
6237extern __inline __m256i
6238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6239_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6240 void const *__P)
6241{
6242 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6243 (__v8si) __W,
6244 (__mmask8)
6245 __U);
6246}
6247
6248extern __inline __m256i
6249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6250_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6251{
6252 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6253 (__v8si)
6254 _mm256_setzero_si256 (),
6255 (__mmask8)
6256 __U);
6257}
6258
6259extern __inline __m128i
6260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6261_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6262{
6263 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6264 (__v4si) __W,
6265 (__mmask8) __U);
6266}
6267
6268extern __inline __m128i
6269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6271{
6272 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6273 (__v4si)
6274 _mm_setzero_si128 (),
6275 (__mmask8) __U);
6276}
6277
6278extern __inline __m128i
6279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6280_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6281{
6282 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6283 (__v4si) __W,
6284 (__mmask8)
6285 __U);
6286}
6287
6288extern __inline __m128i
6289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6290_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6291{
6292 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6293 (__v4si)
6294 _mm_setzero_si128 (),
6295 (__mmask8)
6296 __U);
6297}
6298
6299extern __inline __m256d
6300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6301_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6302{
6303 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6304 /* idx */ ,
6305 (__v4df) __A,
6306 (__v4df) __B,
c42b0bdf 6307 (__mmask8) -1);
936c0fe4
AI
6308}
6309
6310extern __inline __m256d
6311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6312_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6313 __m256d __B)
6314{
6315 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6316 /* idx */ ,
6317 (__v4df) __A,
6318 (__v4df) __B,
6319 (__mmask8)
6320 __U);
6321}
6322
6323extern __inline __m256d
6324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6326 __m256d __B)
6327{
6328 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6329 (__v4di) __I
6330 /* idx */ ,
6331 (__v4df) __B,
6332 (__mmask8)
6333 __U);
6334}
6335
6336extern __inline __m256d
6337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6338_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6339 __m256d __B)
6340{
6341 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6342 /* idx */ ,
6343 (__v4df) __A,
6344 (__v4df) __B,
6345 (__mmask8)
6346 __U);
6347}
6348
6349extern __inline __m256
6350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6351_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6352{
6353 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6354 /* idx */ ,
6355 (__v8sf) __A,
6356 (__v8sf) __B,
6357 (__mmask8) -1);
6358}
6359
6360extern __inline __m256
6361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6362_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6363 __m256 __B)
6364{
6365 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6366 /* idx */ ,
6367 (__v8sf) __A,
6368 (__v8sf) __B,
6369 (__mmask8) __U);
6370}
6371
6372extern __inline __m256
6373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6374_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6375 __m256 __B)
6376{
6377 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6378 (__v8si) __I
6379 /* idx */ ,
6380 (__v8sf) __B,
6381 (__mmask8) __U);
6382}
6383
6384extern __inline __m256
6385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6386_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6387 __m256 __B)
6388{
6389 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6390 /* idx */ ,
6391 (__v8sf) __A,
6392 (__v8sf) __B,
6393 (__mmask8)
6394 __U);
6395}
6396
6397extern __inline __m128i
6398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6399_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6400{
6401 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6402 /* idx */ ,
6403 (__v2di) __A,
6404 (__v2di) __B,
6405 (__mmask8) -1);
6406}
6407
6408extern __inline __m128i
6409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6410_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6411 __m128i __B)
6412{
6413 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6414 /* idx */ ,
6415 (__v2di) __A,
6416 (__v2di) __B,
6417 (__mmask8) __U);
6418}
6419
6420extern __inline __m128i
6421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6422_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6423 __m128i __B)
6424{
6425 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6426 (__v2di) __I
6427 /* idx */ ,
6428 (__v2di) __B,
6429 (__mmask8) __U);
6430}
6431
6432extern __inline __m128i
6433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6435 __m128i __B)
6436{
6437 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6438 /* idx */ ,
6439 (__v2di) __A,
6440 (__v2di) __B,
6441 (__mmask8)
6442 __U);
6443}
6444
6445extern __inline __m128i
6446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6447_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6448{
6449 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6450 /* idx */ ,
6451 (__v4si) __A,
6452 (__v4si) __B,
6453 (__mmask8) -1);
6454}
6455
6456extern __inline __m128i
6457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6458_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6459 __m128i __B)
6460{
6461 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6462 /* idx */ ,
6463 (__v4si) __A,
6464 (__v4si) __B,
6465 (__mmask8) __U);
6466}
6467
6468extern __inline __m128i
6469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6470_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6471 __m128i __B)
6472{
6473 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6474 (__v4si) __I
6475 /* idx */ ,
6476 (__v4si) __B,
6477 (__mmask8) __U);
6478}
6479
6480extern __inline __m128i
6481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6482_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6483 __m128i __B)
6484{
6485 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6486 /* idx */ ,
6487 (__v4si) __A,
6488 (__v4si) __B,
6489 (__mmask8)
6490 __U);
6491}
6492
6493extern __inline __m256i
6494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6495_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6496{
6497 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6498 /* idx */ ,
6499 (__v4di) __A,
6500 (__v4di) __B,
6501 (__mmask8) -1);
6502}
6503
6504extern __inline __m256i
6505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6506_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6507 __m256i __B)
6508{
6509 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6510 /* idx */ ,
6511 (__v4di) __A,
6512 (__v4di) __B,
6513 (__mmask8) __U);
6514}
6515
6516extern __inline __m256i
6517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6518_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6519 __mmask8 __U, __m256i __B)
6520{
6521 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6522 (__v4di) __I
6523 /* idx */ ,
6524 (__v4di) __B,
6525 (__mmask8) __U);
6526}
6527
6528extern __inline __m256i
6529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6531 __m256i __I, __m256i __B)
6532{
6533 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6534 /* idx */ ,
6535 (__v4di) __A,
6536 (__v4di) __B,
6537 (__mmask8)
6538 __U);
6539}
6540
6541extern __inline __m256i
6542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6543_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6544{
6545 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6546 /* idx */ ,
6547 (__v8si) __A,
6548 (__v8si) __B,
6549 (__mmask8) -1);
6550}
6551
6552extern __inline __m256i
6553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6554_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6555 __m256i __B)
6556{
6557 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6558 /* idx */ ,
6559 (__v8si) __A,
6560 (__v8si) __B,
6561 (__mmask8) __U);
6562}
6563
6564extern __inline __m256i
6565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6566_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6567 __mmask8 __U, __m256i __B)
6568{
6569 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6570 (__v8si) __I
6571 /* idx */ ,
6572 (__v8si) __B,
6573 (__mmask8) __U);
6574}
6575
6576extern __inline __m256i
6577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6578_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6579 __m256i __I, __m256i __B)
6580{
6581 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6582 /* idx */ ,
6583 (__v8si) __A,
6584 (__v8si) __B,
6585 (__mmask8)
6586 __U);
6587}
6588
6589extern __inline __m128d
6590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6591_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6592{
6593 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6594 /* idx */ ,
6595 (__v2df) __A,
6596 (__v2df) __B,
c42b0bdf 6597 (__mmask8) -1);
936c0fe4
AI
6598}
6599
6600extern __inline __m128d
6601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6602_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6603 __m128d __B)
6604{
6605 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6606 /* idx */ ,
6607 (__v2df) __A,
6608 (__v2df) __B,
6609 (__mmask8)
6610 __U);
6611}
6612
6613extern __inline __m128d
6614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6615_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6616 __m128d __B)
6617{
6618 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6619 (__v2di) __I
6620 /* idx */ ,
6621 (__v2df) __B,
6622 (__mmask8)
6623 __U);
6624}
6625
6626extern __inline __m128d
6627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6628_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6629 __m128d __B)
6630{
6631 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6632 /* idx */ ,
6633 (__v2df) __A,
6634 (__v2df) __B,
6635 (__mmask8)
6636 __U);
6637}
6638
6639extern __inline __m128
6640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6641_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6642{
6643 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6644 /* idx */ ,
6645 (__v4sf) __A,
6646 (__v4sf) __B,
6647 (__mmask8) -1);
6648}
6649
6650extern __inline __m128
6651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6652_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6653 __m128 __B)
6654{
6655 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6656 /* idx */ ,
6657 (__v4sf) __A,
6658 (__v4sf) __B,
6659 (__mmask8) __U);
6660}
6661
6662extern __inline __m128
6663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6664_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6665 __m128 __B)
6666{
6667 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6668 (__v4si) __I
6669 /* idx */ ,
6670 (__v4sf) __B,
6671 (__mmask8) __U);
6672}
6673
6674extern __inline __m128
6675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6676_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6677 __m128 __B)
6678{
6679 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6680 /* idx */ ,
6681 (__v4sf) __A,
6682 (__v4sf) __B,
6683 (__mmask8)
6684 __U);
6685}
6686
6687extern __inline __m128i
6688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6689_mm_srav_epi64 (__m128i __X, __m128i __Y)
6690{
6691 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6692 (__v2di) __Y,
6693 (__v2di)
a25a7887 6694 _mm_setzero_si128 (),
936c0fe4
AI
6695 (__mmask8) -1);
6696}
6697
6698extern __inline __m128i
6699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6700_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6701 __m128i __Y)
6702{
6703 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6704 (__v2di) __Y,
6705 (__v2di) __W,
6706 (__mmask8) __U);
6707}
6708
6709extern __inline __m128i
6710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6711_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6712{
6713 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6714 (__v2di) __Y,
6715 (__v2di)
a25a7887 6716 _mm_setzero_si128 (),
936c0fe4
AI
6717 (__mmask8) __U);
6718}
6719
6720extern __inline __m256i
6721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6722_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6723 __m256i __Y)
6724{
6725 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6726 (__v8si) __Y,
6727 (__v8si) __W,
6728 (__mmask8) __U);
6729}
6730
6731extern __inline __m256i
6732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6733_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6734{
6735 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6736 (__v8si) __Y,
6737 (__v8si)
6738 _mm256_setzero_si256 (),
6739 (__mmask8) __U);
6740}
6741
6742extern __inline __m128i
6743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6745 __m128i __Y)
6746{
6747 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6748 (__v4si) __Y,
6749 (__v4si) __W,
6750 (__mmask8) __U);
6751}
6752
6753extern __inline __m128i
6754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6755_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6756{
6757 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6758 (__v4si) __Y,
6759 (__v4si)
6760 _mm_setzero_si128 (),
6761 (__mmask8) __U);
6762}
6763
6764extern __inline __m256i
6765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6766_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6767 __m256i __Y)
6768{
6769 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6770 (__v4di) __Y,
6771 (__v4di) __W,
6772 (__mmask8) __U);
6773}
6774
6775extern __inline __m256i
6776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6778{
6779 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6780 (__v4di) __Y,
6781 (__v4di)
6782 _mm256_setzero_si256 (),
6783 (__mmask8) __U);
6784}
6785
6786extern __inline __m128i
6787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6788_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6789 __m128i __Y)
6790{
6791 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6792 (__v2di) __Y,
6793 (__v2di) __W,
6794 (__mmask8) __U);
6795}
6796
6797extern __inline __m128i
6798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6799_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6800{
6801 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6802 (__v2di) __Y,
6803 (__v2di)
a25a7887 6804 _mm_setzero_si128 (),
936c0fe4
AI
6805 (__mmask8) __U);
6806}
6807
6808extern __inline __m256i
6809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6810_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6811 __m256i __Y)
6812{
6813 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6814 (__v8si) __Y,
6815 (__v8si) __W,
6816 (__mmask8) __U);
6817}
6818
6819extern __inline __m256i
6820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6821_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6822{
6823 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6824 (__v8si) __Y,
6825 (__v8si)
6826 _mm256_setzero_si256 (),
6827 (__mmask8) __U);
6828}
6829
6830extern __inline __m128i
6831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6832_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6833 __m128i __Y)
6834{
6835 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6836 (__v4si) __Y,
6837 (__v4si) __W,
6838 (__mmask8) __U);
6839}
6840
6841extern __inline __m128i
6842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6843_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6844{
6845 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6846 (__v4si) __Y,
6847 (__v4si)
6848 _mm_setzero_si128 (),
6849 (__mmask8) __U);
6850}
6851
6852extern __inline __m256i
6853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6854_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6855 __m256i __Y)
6856{
6857 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6858 (__v8si) __Y,
6859 (__v8si) __W,
6860 (__mmask8) __U);
6861}
6862
6863extern __inline __m256i
6864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6865_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6866{
6867 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6868 (__v8si) __Y,
6869 (__v8si)
6870 _mm256_setzero_si256 (),
6871 (__mmask8) __U);
6872}
6873
6874extern __inline __m128i
6875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6876_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6877 __m128i __Y)
6878{
6879 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6880 (__v4si) __Y,
6881 (__v4si) __W,
6882 (__mmask8) __U);
6883}
6884
6885extern __inline __m128i
6886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6887_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6888{
6889 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6890 (__v4si) __Y,
6891 (__v4si)
6892 _mm_setzero_si128 (),
6893 (__mmask8) __U);
6894}
6895
6896extern __inline __m256i
6897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6898_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6899 __m256i __Y)
6900{
6901 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6902 (__v4di) __Y,
6903 (__v4di) __W,
6904 (__mmask8) __U);
6905}
6906
6907extern __inline __m256i
6908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6909_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6910{
6911 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6912 (__v4di) __Y,
6913 (__v4di)
6914 _mm256_setzero_si256 (),
6915 (__mmask8) __U);
6916}
6917
6918extern __inline __m128i
6919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6920_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6921 __m128i __Y)
6922{
6923 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6924 (__v2di) __Y,
6925 (__v2di) __W,
6926 (__mmask8) __U);
6927}
6928
6929extern __inline __m128i
6930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6931_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6932{
6933 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6934 (__v2di) __Y,
6935 (__v2di)
a25a7887 6936 _mm_setzero_si128 (),
936c0fe4
AI
6937 (__mmask8) __U);
6938}
6939
6940extern __inline __m256i
6941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6942_mm256_rolv_epi32 (__m256i __A, __m256i __B)
6943{
6944 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6945 (__v8si) __B,
6946 (__v8si)
6947 _mm256_setzero_si256 (),
6948 (__mmask8) -1);
6949}
6950
6951extern __inline __m256i
6952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6953_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6954 __m256i __B)
6955{
6956 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6957 (__v8si) __B,
6958 (__v8si) __W,
6959 (__mmask8) __U);
6960}
6961
6962extern __inline __m256i
6963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6964_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6965{
6966 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6967 (__v8si) __B,
6968 (__v8si)
6969 _mm256_setzero_si256 (),
6970 (__mmask8) __U);
6971}
6972
6973extern __inline __m128i
6974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6975_mm_rolv_epi32 (__m128i __A, __m128i __B)
6976{
6977 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6978 (__v4si) __B,
6979 (__v4si)
6980 _mm_setzero_si128 (),
6981 (__mmask8) -1);
6982}
6983
6984extern __inline __m128i
6985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6986_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
6987 __m128i __B)
6988{
6989 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6990 (__v4si) __B,
6991 (__v4si) __W,
6992 (__mmask8) __U);
6993}
6994
6995extern __inline __m128i
6996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6997_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
6998{
6999 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7000 (__v4si) __B,
7001 (__v4si)
7002 _mm_setzero_si128 (),
7003 (__mmask8) __U);
7004}
7005
7006extern __inline __m256i
7007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7008_mm256_rorv_epi32 (__m256i __A, __m256i __B)
7009{
7010 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7011 (__v8si) __B,
7012 (__v8si)
7013 _mm256_setzero_si256 (),
7014 (__mmask8) -1);
7015}
7016
7017extern __inline __m256i
7018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7019_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7020 __m256i __B)
7021{
7022 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7023 (__v8si) __B,
7024 (__v8si) __W,
7025 (__mmask8) __U);
7026}
7027
7028extern __inline __m256i
7029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7030_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7031{
7032 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7033 (__v8si) __B,
7034 (__v8si)
7035 _mm256_setzero_si256 (),
7036 (__mmask8) __U);
7037}
7038
7039extern __inline __m128i
7040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7041_mm_rorv_epi32 (__m128i __A, __m128i __B)
7042{
7043 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7044 (__v4si) __B,
7045 (__v4si)
7046 _mm_setzero_si128 (),
7047 (__mmask8) -1);
7048}
7049
7050extern __inline __m128i
7051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7052_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7053 __m128i __B)
7054{
7055 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7056 (__v4si) __B,
7057 (__v4si) __W,
7058 (__mmask8) __U);
7059}
7060
7061extern __inline __m128i
7062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7063_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7064{
7065 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7066 (__v4si) __B,
7067 (__v4si)
7068 _mm_setzero_si128 (),
7069 (__mmask8) __U);
7070}
7071
7072extern __inline __m256i
7073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7074_mm256_rolv_epi64 (__m256i __A, __m256i __B)
7075{
7076 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7077 (__v4di) __B,
7078 (__v4di)
7079 _mm256_setzero_si256 (),
7080 (__mmask8) -1);
7081}
7082
7083extern __inline __m256i
7084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7085_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7086 __m256i __B)
7087{
7088 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7089 (__v4di) __B,
7090 (__v4di) __W,
7091 (__mmask8) __U);
7092}
7093
7094extern __inline __m256i
7095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7096_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7097{
7098 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7099 (__v4di) __B,
7100 (__v4di)
7101 _mm256_setzero_si256 (),
7102 (__mmask8) __U);
7103}
7104
7105extern __inline __m128i
7106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7107_mm_rolv_epi64 (__m128i __A, __m128i __B)
7108{
7109 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7110 (__v2di) __B,
7111 (__v2di)
a25a7887 7112 _mm_setzero_si128 (),
936c0fe4
AI
7113 (__mmask8) -1);
7114}
7115
7116extern __inline __m128i
7117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7118_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7119 __m128i __B)
7120{
7121 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7122 (__v2di) __B,
7123 (__v2di) __W,
7124 (__mmask8) __U);
7125}
7126
7127extern __inline __m128i
7128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7129_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7130{
7131 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7132 (__v2di) __B,
7133 (__v2di)
a25a7887 7134 _mm_setzero_si128 (),
936c0fe4
AI
7135 (__mmask8) __U);
7136}
7137
7138extern __inline __m256i
7139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7140_mm256_rorv_epi64 (__m256i __A, __m256i __B)
7141{
7142 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7143 (__v4di) __B,
7144 (__v4di)
7145 _mm256_setzero_si256 (),
7146 (__mmask8) -1);
7147}
7148
7149extern __inline __m256i
7150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7151_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7152 __m256i __B)
7153{
7154 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7155 (__v4di) __B,
7156 (__v4di) __W,
7157 (__mmask8) __U);
7158}
7159
7160extern __inline __m256i
7161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7162_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7163{
7164 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7165 (__v4di) __B,
7166 (__v4di)
7167 _mm256_setzero_si256 (),
7168 (__mmask8) __U);
7169}
7170
7171extern __inline __m128i
7172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7173_mm_rorv_epi64 (__m128i __A, __m128i __B)
7174{
7175 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7176 (__v2di) __B,
7177 (__v2di)
a25a7887 7178 _mm_setzero_si128 (),
936c0fe4
AI
7179 (__mmask8) -1);
7180}
7181
7182extern __inline __m128i
7183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7184_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7185 __m128i __B)
7186{
7187 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7188 (__v2di) __B,
7189 (__v2di) __W,
7190 (__mmask8) __U);
7191}
7192
7193extern __inline __m128i
7194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7195_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7196{
7197 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7198 (__v2di) __B,
7199 (__v2di)
a25a7887 7200 _mm_setzero_si128 (),
936c0fe4
AI
7201 (__mmask8) __U);
7202}
7203
7204extern __inline __m256i
7205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7206_mm256_srav_epi64 (__m256i __X, __m256i __Y)
7207{
7208 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7209 (__v4di) __Y,
7210 (__v4di)
7211 _mm256_setzero_si256 (),
7212 (__mmask8) -1);
7213}
7214
7215extern __inline __m256i
7216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7217_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7218 __m256i __Y)
7219{
7220 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7221 (__v4di) __Y,
7222 (__v4di) __W,
7223 (__mmask8) __U);
7224}
7225
7226extern __inline __m256i
7227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7228_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7229{
7230 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7231 (__v4di) __Y,
7232 (__v4di)
7233 _mm256_setzero_si256 (),
7234 (__mmask8) __U);
7235}
7236
7237extern __inline __m256i
7238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7239_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7240 __m256i __B)
7241{
7242 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7243 (__v4di) __B,
7244 (__v4di) __W, __U);
7245}
7246
7247extern __inline __m256i
7248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7249_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7250{
7251 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7252 (__v4di) __B,
7253 (__v4di)
7254 _mm256_setzero_pd (),
7255 __U);
7256}
7257
7258extern __inline __m128i
7259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7260_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7261 __m128i __B)
7262{
7263 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7264 (__v2di) __B,
7265 (__v2di) __W, __U);
7266}
7267
7268extern __inline __m128i
7269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7270_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7271{
7272 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7273 (__v2di) __B,
7274 (__v2di)
7275 _mm_setzero_pd (),
7276 __U);
7277}
7278
7279extern __inline __m256i
7280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7281_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7282 __m256i __B)
7283{
7284 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7285 (__v4di) __B,
7286 (__v4di) __W, __U);
7287}
7288
7289extern __inline __m256i
7290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7291_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7292{
7293 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7294 (__v4di) __B,
7295 (__v4di)
7296 _mm256_setzero_pd (),
7297 __U);
7298}
7299
7300extern __inline __m128i
7301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7302_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7303 __m128i __B)
7304{
7305 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7306 (__v2di) __B,
7307 (__v2di) __W, __U);
7308}
7309
7310extern __inline __m128i
7311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7312_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7313{
7314 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7315 (__v2di) __B,
7316 (__v2di)
7317 _mm_setzero_pd (),
7318 __U);
7319}
7320
7321extern __inline __m256i
7322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7323_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7324 __m256i __B)
7325{
7326 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7327 (__v4di) __B,
7328 (__v4di) __W,
7329 (__mmask8) __U);
7330}
7331
7332extern __inline __m256i
7333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7334_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7335{
7336 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7337 (__v4di) __B,
7338 (__v4di)
7339 _mm256_setzero_si256 (),
7340 (__mmask8) __U);
7341}
7342
7343extern __inline __m128i
7344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7345_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7346{
7347 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7348 (__v2di) __B,
7349 (__v2di) __W,
7350 (__mmask8) __U);
7351}
7352
7353extern __inline __m128i
7354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7355_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7356{
7357 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7358 (__v2di) __B,
7359 (__v2di)
7360 _mm_setzero_si128 (),
7361 (__mmask8) __U);
7362}
7363
7364extern __inline __m256i
7365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7366_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7367 __m256i __B)
7368{
7369 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7370 (__v4di) __B,
7371 (__v4di) __W,
7372 (__mmask8) __U);
7373}
7374
7375extern __inline __m256i
7376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7377_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7378{
7379 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7380 (__v4di) __B,
7381 (__v4di)
7382 _mm256_setzero_si256 (),
7383 (__mmask8) __U);
7384}
7385
7386extern __inline __m128i
7387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7388_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7389 __m128i __B)
7390{
7391 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7392 (__v2di) __B,
7393 (__v2di) __W,
7394 (__mmask8) __U);
7395}
7396
7397extern __inline __m128i
7398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7399_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7400{
7401 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7402 (__v2di) __B,
7403 (__v2di)
7404 _mm_setzero_si128 (),
7405 (__mmask8) __U);
7406}
7407
7408extern __inline __m256d
7409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7410_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7411 __m256d __B)
7412{
7413 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7414 (__v4df) __B,
7415 (__v4df) __W,
7416 (__mmask8) __U);
7417}
7418
7419extern __inline __m256d
7420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7421_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7422{
7423 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7424 (__v4df) __B,
7425 (__v4df)
7426 _mm256_setzero_pd (),
7427 (__mmask8) __U);
7428}
7429
7430extern __inline __m256
7431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7432_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7433{
7434 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7435 (__v8sf) __B,
7436 (__v8sf) __W,
7437 (__mmask8) __U);
7438}
7439
7440extern __inline __m256
7441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7442_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7443{
7444 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7445 (__v8sf) __B,
7446 (__v8sf)
7447 _mm256_setzero_ps (),
7448 (__mmask8) __U);
7449}
7450
7451extern __inline __m128
7452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7453_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7454{
7455 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7456 (__v4sf) __B,
7457 (__v4sf) __W,
7458 (__mmask8) __U);
7459}
7460
7461extern __inline __m128
7462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7463_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7464{
7465 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7466 (__v4sf) __B,
7467 (__v4sf)
7468 _mm_setzero_ps (),
7469 (__mmask8) __U);
7470}
7471
7472extern __inline __m128d
7473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7474_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7475{
7476 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7477 (__v2df) __B,
7478 (__v2df) __W,
7479 (__mmask8) __U);
7480}
7481
7482extern __inline __m128d
7483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7484_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7485{
7486 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7487 (__v2df) __B,
7488 (__v2df)
7489 _mm_setzero_pd (),
7490 (__mmask8) __U);
7491}
7492
7493extern __inline __m256d
7494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7495_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7496 __m256d __B)
7497{
7498 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7499 (__v4df) __B,
7500 (__v4df) __W,
7501 (__mmask8) __U);
7502}
7503
7504extern __inline __m256d
7505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7506_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7507 __m256d __B)
7508{
7509 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7510 (__v4df) __B,
7511 (__v4df) __W,
7512 (__mmask8) __U);
7513}
7514
7515extern __inline __m256d
7516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7517_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7518{
7519 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7520 (__v4df) __B,
7521 (__v4df)
7522 _mm256_setzero_pd (),
7523 (__mmask8) __U);
7524}
7525
7526extern __inline __m256
7527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7528_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7529{
7530 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7531 (__v8sf) __B,
7532 (__v8sf) __W,
7533 (__mmask8) __U);
7534}
7535
7536extern __inline __m256d
7537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7538_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7539{
7540 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7541 (__v4df) __B,
7542 (__v4df)
7543 _mm256_setzero_pd (),
7544 (__mmask8) __U);
7545}
7546
7547extern __inline __m256
7548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7549_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7550{
7551 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7552 (__v8sf) __B,
7553 (__v8sf) __W,
7554 (__mmask8) __U);
7555}
7556
7557extern __inline __m256
7558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7559_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7560{
7561 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7562 (__v8sf) __B,
7563 (__v8sf)
7564 _mm256_setzero_ps (),
7565 (__mmask8) __U);
7566}
7567
7568extern __inline __m256
7569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7570_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7571{
7572 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7573 (__v8sf) __B,
7574 (__v8sf)
7575 _mm256_setzero_ps (),
7576 (__mmask8) __U);
7577}
7578
7579extern __inline __m128
7580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7581_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7582{
7583 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7584 (__v4sf) __B,
7585 (__v4sf) __W,
7586 (__mmask8) __U);
7587}
7588
7589extern __inline __m128
7590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7591_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7592{
7593 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7594 (__v4sf) __B,
7595 (__v4sf) __W,
7596 (__mmask8) __U);
7597}
7598
7599extern __inline __m128
7600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7601_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7602{
7603 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7604 (__v4sf) __B,
7605 (__v4sf)
7606 _mm_setzero_ps (),
7607 (__mmask8) __U);
7608}
7609
7610extern __inline __m128
7611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7612_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7613{
7614 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7615 (__v4sf) __B,
7616 (__v4sf)
7617 _mm_setzero_ps (),
7618 (__mmask8) __U);
7619}
7620
7621extern __inline __m128
7622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7623_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7624{
7625 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7626 (__v4sf) __B,
7627 (__v4sf) __W,
7628 (__mmask8) __U);
7629}
7630
7631extern __inline __m128
7632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7633_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7634{
7635 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7636 (__v4sf) __B,
7637 (__v4sf)
7638 _mm_setzero_ps (),
7639 (__mmask8) __U);
7640}
7641
7642extern __inline __m128d
7643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7644_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7645{
7646 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7647 (__v2df) __B,
7648 (__v2df) __W,
7649 (__mmask8) __U);
7650}
7651
7652extern __inline __m128d
7653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7654_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7655{
7656 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7657 (__v2df) __B,
7658 (__v2df)
7659 _mm_setzero_pd (),
7660 (__mmask8) __U);
7661}
7662
7663extern __inline __m128d
7664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7665_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7666{
7667 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7668 (__v2df) __B,
7669 (__v2df) __W,
7670 (__mmask8) __U);
7671}
7672
7673extern __inline __m128d
7674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7675_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7676{
7677 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7678 (__v2df) __B,
7679 (__v2df)
7680 _mm_setzero_pd (),
7681 (__mmask8) __U);
7682}
7683
7684extern __inline __m128d
7685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7686_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7687{
7688 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7689 (__v2df) __B,
7690 (__v2df) __W,
7691 (__mmask8) __U);
7692}
7693
7694extern __inline __m128d
7695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7696_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7697{
7698 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7699 (__v2df) __B,
7700 (__v2df)
7701 _mm_setzero_pd (),
7702 (__mmask8) __U);
7703}
7704
7705extern __inline __m256
7706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7707_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7708{
7709 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7710 (__v8sf) __B,
7711 (__v8sf) __W,
7712 (__mmask8) __U);
7713}
7714
7715extern __inline __m256
7716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7717_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7718{
7719 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7720 (__v8sf) __B,
7721 (__v8sf)
7722 _mm256_setzero_ps (),
7723 (__mmask8) __U);
7724}
7725
7726extern __inline __m256d
7727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7728_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7729 __m256d __B)
7730{
7731 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7732 (__v4df) __B,
7733 (__v4df) __W,
7734 (__mmask8) __U);
7735}
7736
7737extern __inline __m256d
7738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7739_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7740{
7741 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7742 (__v4df) __B,
7743 (__v4df)
7744 _mm256_setzero_pd (),
7745 (__mmask8) __U);
7746}
7747
7748extern __inline __m256i
7749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7750_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7751{
7752 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7753 (__v4di) __B,
7754 (__v4di)
7755 _mm256_setzero_si256 (),
7756 __M);
7757}
7758
7759extern __inline __m256i
7760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7761_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7762 __m256i __B)
7763{
7764 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7765 (__v4di) __B,
7766 (__v4di) __W, __M);
7767}
7768
7769extern __inline __m256i
7770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7771_mm256_min_epi64 (__m256i __A, __m256i __B)
7772{
7773 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7774 (__v4di) __B,
7775 (__v4di)
7776 _mm256_setzero_si256 (),
7777 (__mmask8) -1);
7778}
7779
7780extern __inline __m256i
7781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7782_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7783 __m256i __B)
7784{
7785 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7786 (__v4di) __B,
7787 (__v4di) __W, __M);
7788}
7789
7790extern __inline __m256i
7791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7792_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7793{
7794 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7795 (__v4di) __B,
7796 (__v4di)
7797 _mm256_setzero_si256 (),
7798 __M);
7799}
7800
7801extern __inline __m256i
7802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7803_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7804{
7805 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7806 (__v4di) __B,
7807 (__v4di)
7808 _mm256_setzero_si256 (),
7809 __M);
7810}
7811
7812extern __inline __m256i
7813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7814_mm256_max_epi64 (__m256i __A, __m256i __B)
7815{
7816 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7817 (__v4di) __B,
7818 (__v4di)
7819 _mm256_setzero_si256 (),
7820 (__mmask8) -1);
7821}
7822
7823extern __inline __m256i
7824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7825_mm256_max_epu64 (__m256i __A, __m256i __B)
7826{
7827 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7828 (__v4di) __B,
7829 (__v4di)
7830 _mm256_setzero_si256 (),
7831 (__mmask8) -1);
7832}
7833
7834extern __inline __m256i
7835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7836_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7837 __m256i __B)
7838{
7839 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7840 (__v4di) __B,
7841 (__v4di) __W, __M);
7842}
7843
7844extern __inline __m256i
7845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7846_mm256_min_epu64 (__m256i __A, __m256i __B)
7847{
7848 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7849 (__v4di) __B,
7850 (__v4di)
7851 _mm256_setzero_si256 (),
7852 (__mmask8) -1);
7853}
7854
7855extern __inline __m256i
7856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7857_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7858 __m256i __B)
7859{
7860 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7861 (__v4di) __B,
7862 (__v4di) __W, __M);
7863}
7864
7865extern __inline __m256i
7866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7867_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7868{
7869 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7870 (__v4di) __B,
7871 (__v4di)
7872 _mm256_setzero_si256 (),
7873 __M);
7874}
7875
7876extern __inline __m256i
7877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7878_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7879{
7880 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7881 (__v8si) __B,
7882 (__v8si)
7883 _mm256_setzero_si256 (),
7884 __M);
7885}
7886
7887extern __inline __m256i
7888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7889_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7890 __m256i __B)
7891{
7892 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7893 (__v8si) __B,
7894 (__v8si) __W, __M);
7895}
7896
7897extern __inline __m256i
7898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7899_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7900{
7901 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7902 (__v8si) __B,
7903 (__v8si)
7904 _mm256_setzero_si256 (),
7905 __M);
7906}
7907
7908extern __inline __m256i
7909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7910_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7911 __m256i __B)
7912{
7913 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7914 (__v8si) __B,
7915 (__v8si) __W, __M);
7916}
7917
7918extern __inline __m256i
7919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7920_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7921{
7922 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7923 (__v8si) __B,
7924 (__v8si)
7925 _mm256_setzero_si256 (),
7926 __M);
7927}
7928
7929extern __inline __m256i
7930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7931_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7932 __m256i __B)
7933{
7934 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7935 (__v8si) __B,
7936 (__v8si) __W, __M);
7937}
7938
7939extern __inline __m256i
7940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7941_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7942{
7943 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7944 (__v8si) __B,
7945 (__v8si)
7946 _mm256_setzero_si256 (),
7947 __M);
7948}
7949
7950extern __inline __m256i
7951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7952_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7953 __m256i __B)
7954{
7955 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7956 (__v8si) __B,
7957 (__v8si) __W, __M);
7958}
7959
7960extern __inline __m128i
7961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7962_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7963{
7964 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7965 (__v2di) __B,
7966 (__v2di)
7967 _mm_setzero_si128 (),
7968 __M);
7969}
7970
7971extern __inline __m128i
7972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7973_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7974 __m128i __B)
7975{
7976 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7977 (__v2di) __B,
7978 (__v2di) __W, __M);
7979}
7980
7981extern __inline __m128i
7982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7983_mm_min_epi64 (__m128i __A, __m128i __B)
7984{
7985 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7986 (__v2di) __B,
7987 (__v2di)
a25a7887 7988 _mm_setzero_si128 (),
936c0fe4
AI
7989 (__mmask8) -1);
7990}
7991
7992extern __inline __m128i
7993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7994_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7995 __m128i __B)
7996{
7997 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7998 (__v2di) __B,
7999 (__v2di) __W, __M);
8000}
8001
8002extern __inline __m128i
8003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8004_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8005{
8006 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8007 (__v2di) __B,
8008 (__v2di)
8009 _mm_setzero_si128 (),
8010 __M);
8011}
8012
8013extern __inline __m128i
8014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8015_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8016{
8017 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8018 (__v2di) __B,
8019 (__v2di)
8020 _mm_setzero_si128 (),
8021 __M);
8022}
8023
8024extern __inline __m128i
8025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8026_mm_max_epi64 (__m128i __A, __m128i __B)
8027{
8028 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8029 (__v2di) __B,
8030 (__v2di)
a25a7887 8031 _mm_setzero_si128 (),
936c0fe4
AI
8032 (__mmask8) -1);
8033}
8034
8035extern __inline __m128i
8036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8037_mm_max_epu64 (__m128i __A, __m128i __B)
8038{
8039 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8040 (__v2di) __B,
8041 (__v2di)
a25a7887 8042 _mm_setzero_si128 (),
936c0fe4
AI
8043 (__mmask8) -1);
8044}
8045
8046extern __inline __m128i
8047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8048_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8049 __m128i __B)
8050{
8051 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8052 (__v2di) __B,
8053 (__v2di) __W, __M);
8054}
8055
8056extern __inline __m128i
8057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8058_mm_min_epu64 (__m128i __A, __m128i __B)
8059{
8060 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8061 (__v2di) __B,
8062 (__v2di)
a25a7887 8063 _mm_setzero_si128 (),
936c0fe4
AI
8064 (__mmask8) -1);
8065}
8066
8067extern __inline __m128i
8068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8069_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8070 __m128i __B)
8071{
8072 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8073 (__v2di) __B,
8074 (__v2di) __W, __M);
8075}
8076
8077extern __inline __m128i
8078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8079_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8080{
8081 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8082 (__v2di) __B,
8083 (__v2di)
8084 _mm_setzero_si128 (),
8085 __M);
8086}
8087
8088extern __inline __m128i
8089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8090_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8091{
8092 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8093 (__v4si) __B,
8094 (__v4si)
8095 _mm_setzero_si128 (),
8096 __M);
8097}
8098
8099extern __inline __m128i
8100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8101_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8102 __m128i __B)
8103{
8104 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8105 (__v4si) __B,
8106 (__v4si) __W, __M);
8107}
8108
8109extern __inline __m128i
8110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8111_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8112{
8113 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8114 (__v4si) __B,
8115 (__v4si)
8116 _mm_setzero_si128 (),
8117 __M);
8118}
8119
8120extern __inline __m128i
8121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8122_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8123 __m128i __B)
8124{
8125 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8126 (__v4si) __B,
8127 (__v4si) __W, __M);
8128}
8129
8130extern __inline __m128i
8131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8132_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8133{
8134 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8135 (__v4si) __B,
8136 (__v4si)
8137 _mm_setzero_si128 (),
8138 __M);
8139}
8140
8141extern __inline __m128i
8142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8143_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8144 __m128i __B)
8145{
8146 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8147 (__v4si) __B,
8148 (__v4si) __W, __M);
8149}
8150
8151extern __inline __m128i
8152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8153_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8154{
8155 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8156 (__v4si) __B,
8157 (__v4si)
8158 _mm_setzero_si128 (),
8159 __M);
8160}
8161
8162extern __inline __m128i
8163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8164_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8165 __m128i __B)
8166{
8167 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8168 (__v4si) __B,
8169 (__v4si) __W, __M);
8170}
8171
8172#ifndef __AVX512CD__
8173#pragma GCC push_options
8174#pragma GCC target("avx512vl,avx512cd")
8175#define __DISABLE_AVX512VLCD__
8176#endif
8177
8178extern __inline __m128i
8179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180_mm_broadcastmb_epi64 (__mmask8 __A)
8181{
8182 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8183}
8184
8185extern __inline __m256i
8186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8187_mm256_broadcastmb_epi64 (__mmask8 __A)
8188{
8189 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8190}
8191
8192extern __inline __m128i
8193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8194_mm_broadcastmw_epi32 (__mmask16 __A)
8195{
8196 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8197}
8198
8199extern __inline __m256i
8200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8201_mm256_broadcastmw_epi32 (__mmask16 __A)
8202{
8203 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8204}
8205
8206extern __inline __m256i
8207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8208_mm256_lzcnt_epi32 (__m256i __A)
8209{
8210 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8211 (__v8si)
8212 _mm256_setzero_si256 (),
8213 (__mmask8) -1);
8214}
8215
8216extern __inline __m256i
8217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8218_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8219{
8220 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8221 (__v8si) __W,
8222 (__mmask8) __U);
8223}
8224
8225extern __inline __m256i
8226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8227_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8228{
8229 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8230 (__v8si)
8231 _mm256_setzero_si256 (),
8232 (__mmask8) __U);
8233}
8234
8235extern __inline __m256i
8236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8237_mm256_lzcnt_epi64 (__m256i __A)
8238{
8239 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8240 (__v4di)
8241 _mm256_setzero_si256 (),
8242 (__mmask8) -1);
8243}
8244
8245extern __inline __m256i
8246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8247_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8248{
8249 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8250 (__v4di) __W,
8251 (__mmask8) __U);
8252}
8253
8254extern __inline __m256i
8255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8256_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8257{
8258 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8259 (__v4di)
8260 _mm256_setzero_si256 (),
8261 (__mmask8) __U);
8262}
8263
8264extern __inline __m256i
8265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8266_mm256_conflict_epi64 (__m256i __A)
8267{
8268 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8269 (__v4di)
8270 _mm256_setzero_si256 (),
c42b0bdf 8271 (__mmask8) -1);
936c0fe4
AI
8272}
8273
8274extern __inline __m256i
8275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8276_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8277{
8278 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8279 (__v4di) __W,
8280 (__mmask8)
8281 __U);
8282}
8283
8284extern __inline __m256i
8285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8286_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8287{
8288 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8289 (__v4di)
8290 _mm256_setzero_si256 (),
8291 (__mmask8)
8292 __U);
8293}
8294
8295extern __inline __m256i
8296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8297_mm256_conflict_epi32 (__m256i __A)
8298{
8299 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8300 (__v8si)
8301 _mm256_setzero_si256 (),
c42b0bdf 8302 (__mmask8) -1);
936c0fe4
AI
8303}
8304
8305extern __inline __m256i
8306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8307_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8308{
8309 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8310 (__v8si) __W,
8311 (__mmask8)
8312 __U);
8313}
8314
8315extern __inline __m256i
8316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8317_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8318{
8319 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8320 (__v8si)
8321 _mm256_setzero_si256 (),
8322 (__mmask8)
8323 __U);
8324}
8325
8326extern __inline __m128i
8327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8328_mm_lzcnt_epi32 (__m128i __A)
8329{
8330 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8331 (__v4si)
8332 _mm_setzero_si128 (),
8333 (__mmask8) -1);
8334}
8335
8336extern __inline __m128i
8337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8338_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8339{
8340 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8341 (__v4si) __W,
8342 (__mmask8) __U);
8343}
8344
8345extern __inline __m128i
8346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8347_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8348{
8349 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8350 (__v4si)
8351 _mm_setzero_si128 (),
8352 (__mmask8) __U);
8353}
8354
8355extern __inline __m128i
8356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8357_mm_lzcnt_epi64 (__m128i __A)
8358{
8359 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8360 (__v2di)
a25a7887 8361 _mm_setzero_si128 (),
936c0fe4
AI
8362 (__mmask8) -1);
8363}
8364
8365extern __inline __m128i
8366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8367_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8368{
8369 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8370 (__v2di) __W,
8371 (__mmask8) __U);
8372}
8373
8374extern __inline __m128i
8375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8376_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8377{
8378 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8379 (__v2di)
a25a7887 8380 _mm_setzero_si128 (),
936c0fe4
AI
8381 (__mmask8) __U);
8382}
8383
8384extern __inline __m128i
8385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386_mm_conflict_epi64 (__m128i __A)
8387{
8388 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8389 (__v2di)
a25a7887 8390 _mm_setzero_si128 (),
c42b0bdf 8391 (__mmask8) -1);
936c0fe4
AI
8392}
8393
8394extern __inline __m128i
8395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8396_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8397{
8398 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8399 (__v2di) __W,
8400 (__mmask8)
8401 __U);
8402}
8403
8404extern __inline __m128i
8405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8406_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8407{
8408 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8409 (__v2di)
a25a7887 8410 _mm_setzero_si128 (),
936c0fe4
AI
8411 (__mmask8)
8412 __U);
8413}
8414
8415extern __inline __m128i
8416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8417_mm_conflict_epi32 (__m128i __A)
8418{
8419 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8420 (__v4si)
8421 _mm_setzero_si128 (),
c42b0bdf 8422 (__mmask8) -1);
936c0fe4
AI
8423}
8424
8425extern __inline __m128i
8426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8427_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8428{
8429 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8430 (__v4si) __W,
8431 (__mmask8)
8432 __U);
8433}
8434
8435extern __inline __m128i
8436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8437_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8438{
8439 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8440 (__v4si)
8441 _mm_setzero_si128 (),
8442 (__mmask8)
8443 __U);
8444}
8445
8446#ifdef __DISABLE_AVX512VLCD__
8447#pragma GCC pop_options
8448#endif
8449
8450extern __inline __m256d
8451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8452_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8453 __m256d __B)
8454{
8455 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8456 (__v4df) __B,
8457 (__v4df) __W,
8458 (__mmask8) __U);
8459}
8460
8461extern __inline __m256d
8462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8463_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8464{
8465 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8466 (__v4df) __B,
8467 (__v4df)
8468 _mm256_setzero_pd (),
8469 (__mmask8) __U);
8470}
8471
8472extern __inline __m128d
8473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8474_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8475 __m128d __B)
8476{
8477 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8478 (__v2df) __B,
8479 (__v2df) __W,
8480 (__mmask8) __U);
8481}
8482
8483extern __inline __m128d
8484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8485_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8486{
8487 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8488 (__v2df) __B,
8489 (__v2df)
8490 _mm_setzero_pd (),
8491 (__mmask8) __U);
8492}
8493
8494extern __inline __m256
8495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8496_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8497 __m256 __B)
8498{
8499 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8500 (__v8sf) __B,
8501 (__v8sf) __W,
8502 (__mmask8) __U);
8503}
8504
8505extern __inline __m256d
8506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8507_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8508 __m256d __B)
8509{
8510 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8511 (__v4df) __B,
8512 (__v4df) __W,
8513 (__mmask8) __U);
8514}
8515
8516extern __inline __m256d
8517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8518_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8519{
8520 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8521 (__v4df) __B,
8522 (__v4df)
8523 _mm256_setzero_pd (),
8524 (__mmask8) __U);
8525}
8526
8527extern __inline __m128d
8528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8529_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8530 __m128d __B)
8531{
8532 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8533 (__v2df) __B,
8534 (__v2df) __W,
8535 (__mmask8) __U);
8536}
8537
8538extern __inline __m128d
8539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8540_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8541{
8542 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8543 (__v2df) __B,
8544 (__v2df)
8545 _mm_setzero_pd (),
8546 (__mmask8) __U);
8547}
8548
8549extern __inline __m256
8550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8551_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8552 __m256 __B)
8553{
8554 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8555 (__v8sf) __B,
8556 (__v8sf) __W,
8557 (__mmask8) __U);
8558}
8559
8560extern __inline __m256
8561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8562_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8563{
8564 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8565 (__v8sf) __B,
8566 (__v8sf)
8567 _mm256_setzero_ps (),
8568 (__mmask8) __U);
8569}
8570
8571extern __inline __m128
8572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8573_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8574{
8575 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8576 (__v4sf) __B,
8577 (__v4sf) __W,
8578 (__mmask8) __U);
8579}
8580
8581extern __inline __m128
8582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8583_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8584{
8585 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8586 (__v4sf) __B,
8587 (__v4sf)
8588 _mm_setzero_ps (),
8589 (__mmask8) __U);
8590}
8591
8592extern __inline __m128
8593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8594_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8595{
8596 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8597 (__v4sf) __W,
8598 (__mmask8) __U);
8599}
8600
8601extern __inline __m128
8602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8603_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8604{
8605 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8606 (__v4sf)
8607 _mm_setzero_ps (),
8608 (__mmask8) __U);
8609}
8610
8611extern __inline __m256
8612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8613_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8614{
8615 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8616 (__v8sf) __B,
8617 (__v8sf)
8618 _mm256_setzero_ps (),
8619 (__mmask8) __U);
8620}
8621
8622extern __inline __m256
8623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8624_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8625{
8626 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8627 (__v8sf) __W,
8628 (__mmask8) __U);
8629}
8630
8631extern __inline __m256
8632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8633_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8634{
8635 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8636 (__v8sf)
8637 _mm256_setzero_ps (),
8638 (__mmask8) __U);
8639}
8640
8641extern __inline __m128
8642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8643_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8644{
8645 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8646 (__v4sf) __B,
8647 (__v4sf) __W,
8648 (__mmask8) __U);
8649}
8650
8651extern __inline __m128
8652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8653_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8654{
8655 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8656 (__v4sf) __B,
8657 (__v4sf)
8658 _mm_setzero_ps (),
8659 (__mmask8) __U);
8660}
8661
8662extern __inline __m256i
8663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8664_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8665 __m128i __B)
8666{
8667 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8668 (__v4si) __B,
8669 (__v8si) __W,
8670 (__mmask8) __U);
8671}
8672
8673extern __inline __m256i
8674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8675_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8676{
8677 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8678 (__v4si) __B,
8679 (__v8si)
8680 _mm256_setzero_si256 (),
8681 (__mmask8) __U);
8682}
8683
8684extern __inline __m128i
8685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8686_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8687 __m128i __B)
8688{
8689 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8690 (__v4si) __B,
8691 (__v4si) __W,
8692 (__mmask8) __U);
8693}
8694
8695extern __inline __m128i
8696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8697_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8698{
8699 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8700 (__v4si) __B,
8701 (__v4si)
8702 _mm_setzero_si128 (),
8703 (__mmask8) __U);
8704}
8705
8706extern __inline __m256i
8707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8708_mm256_sra_epi64 (__m256i __A, __m128i __B)
8709{
8710 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8711 (__v2di) __B,
8712 (__v4di)
8713 _mm256_setzero_si256 (),
8714 (__mmask8) -1);
8715}
8716
8717extern __inline __m256i
8718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8719_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8720 __m128i __B)
8721{
8722 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8723 (__v2di) __B,
8724 (__v4di) __W,
8725 (__mmask8) __U);
8726}
8727
8728extern __inline __m256i
8729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8731{
8732 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8733 (__v2di) __B,
8734 (__v4di)
8735 _mm256_setzero_si256 (),
8736 (__mmask8) __U);
8737}
8738
8739extern __inline __m128i
8740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8741_mm_sra_epi64 (__m128i __A, __m128i __B)
8742{
8743 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8744 (__v2di) __B,
8745 (__v2di)
a25a7887 8746 _mm_setzero_si128 (),
936c0fe4
AI
8747 (__mmask8) -1);
8748}
8749
8750extern __inline __m128i
8751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8752_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8753 __m128i __B)
8754{
8755 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8756 (__v2di) __B,
8757 (__v2di) __W,
8758 (__mmask8) __U);
8759}
8760
8761extern __inline __m128i
8762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8763_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8764{
8765 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8766 (__v2di) __B,
8767 (__v2di)
a25a7887 8768 _mm_setzero_si128 (),
936c0fe4
AI
8769 (__mmask8) __U);
8770}
8771
8772extern __inline __m128i
8773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8774_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8775 __m128i __B)
8776{
8777 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8778 (__v4si) __B,
8779 (__v4si) __W,
8780 (__mmask8) __U);
8781}
8782
8783extern __inline __m128i
8784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8785_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8786{
8787 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8788 (__v4si) __B,
8789 (__v4si)
8790 _mm_setzero_si128 (),
8791 (__mmask8) __U);
8792}
8793
8794extern __inline __m128i
8795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8796_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8797 __m128i __B)
8798{
8799 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8800 (__v2di) __B,
8801 (__v2di) __W,
8802 (__mmask8) __U);
8803}
8804
8805extern __inline __m128i
8806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8807_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8808{
8809 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8810 (__v2di) __B,
8811 (__v2di)
a25a7887 8812 _mm_setzero_si128 (),
936c0fe4
AI
8813 (__mmask8) __U);
8814}
8815
8816extern __inline __m256i
8817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8818_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8819 __m128i __B)
8820{
8821 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8822 (__v4si) __B,
8823 (__v8si) __W,
8824 (__mmask8) __U);
8825}
8826
8827extern __inline __m256i
8828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8829_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8830{
8831 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8832 (__v4si) __B,
8833 (__v8si)
8834 _mm256_setzero_si256 (),
8835 (__mmask8) __U);
8836}
8837
8838extern __inline __m256i
8839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8840_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8841 __m128i __B)
8842{
8843 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8844 (__v2di) __B,
8845 (__v4di) __W,
8846 (__mmask8) __U);
8847}
8848
8849extern __inline __m256i
8850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8851_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8852{
8853 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8854 (__v2di) __B,
8855 (__v4di)
8856 _mm256_setzero_si256 (),
8857 (__mmask8) __U);
8858}
8859
8860extern __inline __m256
8861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8862_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8863 __m256 __Y)
8864{
8865 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8866 (__v8si) __X,
8867 (__v8sf) __W,
8868 (__mmask8) __U);
8869}
8870
8871extern __inline __m256
8872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8873_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8874{
8875 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8876 (__v8si) __X,
8877 (__v8sf)
8878 _mm256_setzero_ps (),
8879 (__mmask8) __U);
8880}
8881
8882extern __inline __m256d
8883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8884_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8885{
8886 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8887 (__v4di) __X,
8888 (__v4df)
8889 _mm256_setzero_pd (),
8890 (__mmask8) -1);
8891}
8892
8893extern __inline __m256d
8894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8895_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8896 __m256d __Y)
8897{
8898 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8899 (__v4di) __X,
8900 (__v4df) __W,
8901 (__mmask8) __U);
8902}
8903
8904extern __inline __m256d
8905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8906_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8907{
8908 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8909 (__v4di) __X,
8910 (__v4df)
8911 _mm256_setzero_pd (),
8912 (__mmask8) __U);
8913}
8914
8915extern __inline __m256d
8916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8917_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8918 __m256i __C)
8919{
8920 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8921 (__v4di) __C,
8922 (__v4df) __W,
8923 (__mmask8)
8924 __U);
8925}
8926
8927extern __inline __m256d
8928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8929_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8930{
8931 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8932 (__v4di) __C,
8933 (__v4df)
8934 _mm256_setzero_pd (),
8935 (__mmask8)
8936 __U);
8937}
8938
8939extern __inline __m256
8940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8941_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8942 __m256i __C)
8943{
8944 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8945 (__v8si) __C,
8946 (__v8sf) __W,
8947 (__mmask8) __U);
8948}
8949
8950extern __inline __m256
8951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8952_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
8953{
8954 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8955 (__v8si) __C,
8956 (__v8sf)
8957 _mm256_setzero_ps (),
8958 (__mmask8) __U);
8959}
8960
8961extern __inline __m128d
8962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8963_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
8964 __m128i __C)
8965{
8966 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8967 (__v2di) __C,
8968 (__v2df) __W,
8969 (__mmask8) __U);
8970}
8971
8972extern __inline __m128d
8973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8974_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
8975{
8976 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8977 (__v2di) __C,
8978 (__v2df)
8979 _mm_setzero_pd (),
8980 (__mmask8) __U);
8981}
8982
8983extern __inline __m128
8984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8985_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
8986 __m128i __C)
8987{
8988 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8989 (__v4si) __C,
8990 (__v4sf) __W,
8991 (__mmask8) __U);
8992}
8993
8994extern __inline __m128
8995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8996_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
8997{
8998 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8999 (__v4si) __C,
9000 (__v4sf)
9001 _mm_setzero_ps (),
9002 (__mmask8) __U);
9003}
9004
9005extern __inline __m256i
9006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9007_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9008{
9009 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9010 (__v8si) __B,
9011 (__v8si)
9012 _mm256_setzero_si256 (),
9013 __M);
9014}
9015
9016extern __inline __m256i
9017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9018_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9019{
9020 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9021 (__v4di) __X,
9022 (__v4di)
9023 _mm256_setzero_si256 (),
9024 __M);
9025}
9026
9027extern __inline __m256i
9028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9029_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9030 __m256i __B)
9031{
9032 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9033 (__v8si) __B,
9034 (__v8si) __W, __M);
9035}
9036
9037extern __inline __m128i
9038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9039_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9040{
9041 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9042 (__v4si) __B,
9043 (__v4si)
9044 _mm_setzero_si128 (),
9045 __M);
9046}
9047
9048extern __inline __m128i
9049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9050_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
9051 __m128i __B)
9052{
9053 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9054 (__v4si) __B,
9055 (__v4si) __W, __M);
9056}
9057
9058extern __inline __m256i
9059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9060_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9061 __m256i __Y)
9062{
9063 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9064 (__v8si) __Y,
9065 (__v4di) __W, __M);
9066}
9067
9068extern __inline __m256i
9069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9070_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9071{
9072 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9073 (__v8si) __Y,
9074 (__v4di)
9075 _mm256_setzero_si256 (),
9076 __M);
9077}
9078
9079extern __inline __m128i
9080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9081_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9082 __m128i __Y)
9083{
9084 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9085 (__v4si) __Y,
9086 (__v2di) __W, __M);
9087}
9088
9089extern __inline __m128i
9090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9091_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9092{
9093 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9094 (__v4si) __Y,
9095 (__v2di)
9096 _mm_setzero_si128 (),
9097 __M);
9098}
9099
395a191d
SP
9100extern __inline __m256i
9101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9102_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
9103{
9104 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9105 (__v4di) __X,
9106 (__v4di)
9107 _mm256_setzero_si256 (),
9108 (__mmask8) -1);
9109}
9110
936c0fe4
AI
9111extern __inline __m256i
9112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9113_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9114 __m256i __Y)
9115{
9116 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9117 (__v4di) __X,
9118 (__v4di) __W,
9119 __M);
9120}
9121
9122extern __inline __m256i
9123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9124_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9125 __m256i __Y)
9126{
9127 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9128 (__v8si) __Y,
9129 (__v4di) __W, __M);
9130}
9131
9132extern __inline __m256i
9133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9134_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9135{
9136 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9137 (__v8si) __X,
9138 (__v8si)
9139 _mm256_setzero_si256 (),
9140 __M);
9141}
9142
9143extern __inline __m256i
9144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9145_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9146{
9147 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9148 (__v8si) __Y,
9149 (__v4di)
9150 _mm256_setzero_si256 (),
9151 __M);
9152}
9153
9154extern __inline __m128i
9155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9156_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9157 __m128i __Y)
9158{
9159 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9160 (__v4si) __Y,
9161 (__v2di) __W, __M);
9162}
9163
9164extern __inline __m128i
9165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9166_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9167{
9168 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9169 (__v4si) __Y,
9170 (__v2di)
9171 _mm_setzero_si128 (),
9172 __M);
9173}
9174
395a191d
SP
9175extern __inline __m256i
9176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9177_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
9178{
9179 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9180 (__v8si) __X,
9181 (__v8si)
9182 _mm256_setzero_si256 (),
9183 (__mmask8) -1);
9184}
9185
936c0fe4
AI
9186extern __inline __m256i
9187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9188_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9189 __m256i __Y)
9190{
9191 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9192 (__v8si) __X,
9193 (__v8si) __W,
9194 __M);
9195}
9196
6b62f323
JJ
9197extern __inline __mmask8
9198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9199_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9200{
6b62f323
JJ
9201 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9202 (__v8si) __Y, 4,
936c0fe4
AI
9203 (__mmask8) __M);
9204}
9205
6b62f323
JJ
9206extern __inline __mmask8
9207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9208_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9209{
6b62f323
JJ
9210 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9211 (__v8si) __Y, 4,
9212 (__mmask8) -1);
936c0fe4
AI
9213}
9214
6b62f323
JJ
9215extern __inline __mmask8
9216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9217_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9218{
6b62f323
JJ
9219 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9220 (__v8si) __Y, 1,
9221 (__mmask8) __M);
936c0fe4
AI
9222}
9223
6b62f323
JJ
9224extern __inline __mmask8
9225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9226_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9227{
6b62f323
JJ
9228 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9229 (__v8si) __Y, 1,
9230 (__mmask8) -1);
936c0fe4
AI
9231}
9232
6b62f323
JJ
9233extern __inline __mmask8
9234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9235_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9236{
6b62f323
JJ
9237 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9238 (__v8si) __Y, 5,
9239 (__mmask8) __M);
936c0fe4
AI
9240}
9241
6b62f323
JJ
9242extern __inline __mmask8
9243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9244_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9245{
6b62f323
JJ
9246 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9247 (__v8si) __Y, 5,
9248 (__mmask8) -1);
936c0fe4
AI
9249}
9250
6b62f323
JJ
9251extern __inline __mmask8
9252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9253_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9254{
6b62f323
JJ
9255 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9256 (__v8si) __Y, 2,
9257 (__mmask8) __M);
936c0fe4
AI
9258}
9259
6b62f323
JJ
9260extern __inline __mmask8
9261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9262_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9263{
6b62f323
JJ
9264 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9265 (__v8si) __Y, 2,
9266 (__mmask8) -1);
936c0fe4
AI
9267}
9268
6b62f323
JJ
9269extern __inline __mmask8
9270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9271_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9272{
6b62f323
JJ
9273 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9274 (__v4di) __Y, 4,
9275 (__mmask8) __M);
936c0fe4
AI
9276}
9277
6b62f323
JJ
9278extern __inline __mmask8
9279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9280_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9281{
6b62f323
JJ
9282 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9283 (__v4di) __Y, 4,
9284 (__mmask8) -1);
936c0fe4
AI
9285}
9286
6b62f323
JJ
9287extern __inline __mmask8
9288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9289_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9290{
6b62f323
JJ
9291 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9292 (__v4di) __Y, 1,
9293 (__mmask8) __M);
936c0fe4
AI
9294}
9295
6b62f323
JJ
9296extern __inline __mmask8
9297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9298_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9299{
6b62f323
JJ
9300 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9301 (__v4di) __Y, 1,
9302 (__mmask8) -1);
936c0fe4
AI
9303}
9304
6b62f323
JJ
9305extern __inline __mmask8
9306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9307_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9308{
6b62f323
JJ
9309 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9310 (__v4di) __Y, 5,
9311 (__mmask8) __M);
936c0fe4
AI
9312}
9313
6b62f323
JJ
9314extern __inline __mmask8
9315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9316_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9317{
6b62f323
JJ
9318 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9319 (__v4di) __Y, 5,
9320 (__mmask8) -1);
936c0fe4
AI
9321}
9322
6b62f323
JJ
9323extern __inline __mmask8
9324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9325_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9326{
6b62f323
JJ
9327 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9328 (__v4di) __Y, 2,
9329 (__mmask8) __M);
936c0fe4
AI
9330}
9331
6b62f323
JJ
9332extern __inline __mmask8
9333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9334_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9335{
6b62f323
JJ
9336 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9337 (__v4di) __Y, 2,
9338 (__mmask8) -1);
936c0fe4
AI
9339}
9340
6b62f323
JJ
9341extern __inline __mmask8
9342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9343_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9344{
6b62f323
JJ
9345 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9346 (__v8si) __Y, 4,
9347 (__mmask8) __M);
936c0fe4
AI
9348}
9349
6b62f323
JJ
9350extern __inline __mmask8
9351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9352_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
9353{
9354 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9355 (__v8si) __Y, 4,
9356 (__mmask8) -1);
936c0fe4
AI
9357}
9358
6b62f323
JJ
9359extern __inline __mmask8
9360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9361_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9362{
6b62f323
JJ
9363 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9364 (__v8si) __Y, 1,
9365 (__mmask8) __M);
936c0fe4
AI
9366}
9367
6b62f323
JJ
9368extern __inline __mmask8
9369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9370_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9371{
6b62f323
JJ
9372 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9373 (__v8si) __Y, 1,
9374 (__mmask8) -1);
936c0fe4
AI
9375}
9376
6b62f323
JJ
9377extern __inline __mmask8
9378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9379_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9380{
6b62f323
JJ
9381 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9382 (__v8si) __Y, 5,
9383 (__mmask8) __M);
936c0fe4
AI
9384}
9385
6b62f323
JJ
9386extern __inline __mmask8
9387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9388_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9389{
6b62f323
JJ
9390 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9391 (__v8si) __Y, 5,
9392 (__mmask8) -1);
936c0fe4
AI
9393}
9394
6b62f323
JJ
9395extern __inline __mmask8
9396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9397_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9398{
6b62f323
JJ
9399 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9400 (__v8si) __Y, 2,
9401 (__mmask8) __M);
936c0fe4
AI
9402}
9403
6b62f323
JJ
9404extern __inline __mmask8
9405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9406_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9407{
6b62f323
JJ
9408 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9409 (__v8si) __Y, 2,
9410 (__mmask8) -1);
936c0fe4
AI
9411}
9412
6b62f323
JJ
9413extern __inline __mmask8
9414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9415_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9416{
6b62f323
JJ
9417 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9418 (__v4di) __Y, 4,
9419 (__mmask8) __M);
936c0fe4
AI
9420}
9421
6b62f323
JJ
9422extern __inline __mmask8
9423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9424_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9425{
6b62f323
JJ
9426 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9427 (__v4di) __Y, 4,
9428 (__mmask8) -1);
936c0fe4
AI
9429}
9430
6b62f323
JJ
9431extern __inline __mmask8
9432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9433_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9434{
6b62f323
JJ
9435 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9436 (__v4di) __Y, 1,
9437 (__mmask8) __M);
936c0fe4
AI
9438}
9439
6b62f323
JJ
9440extern __inline __mmask8
9441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9442_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9443{
6b62f323
JJ
9444 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9445 (__v4di) __Y, 1,
9446 (__mmask8) -1);
936c0fe4
AI
9447}
9448
6b62f323
JJ
9449extern __inline __mmask8
9450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9451_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9452{
6b62f323
JJ
9453 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9454 (__v4di) __Y, 5,
9455 (__mmask8) __M);
936c0fe4
AI
9456}
9457
6b62f323
JJ
9458extern __inline __mmask8
9459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9460_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9461{
6b62f323
JJ
9462 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9463 (__v4di) __Y, 5,
9464 (__mmask8) -1);
936c0fe4
AI
9465}
9466
6b62f323
JJ
9467extern __inline __mmask8
9468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9469_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9470{
6b62f323
JJ
9471 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9472 (__v4di) __Y, 2,
9473 (__mmask8) __M);
936c0fe4
AI
9474}
9475
6b62f323
JJ
9476extern __inline __mmask8
9477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9478_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9479{
6b62f323
JJ
9480 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9481 (__v4di) __Y, 2,
9482 (__mmask8) -1);
936c0fe4
AI
9483}
9484
6b62f323
JJ
9485extern __inline __mmask8
9486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9487_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9488{
6b62f323
JJ
9489 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9490 (__v4si) __Y, 4,
9491 (__mmask8) __M);
936c0fe4
AI
9492}
9493
6b62f323
JJ
9494extern __inline __mmask8
9495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9496_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9497{
6b62f323
JJ
9498 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9499 (__v4si) __Y, 4,
9500 (__mmask8) -1);
936c0fe4
AI
9501}
9502
6b62f323
JJ
9503extern __inline __mmask8
9504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9505_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9506{
6b62f323
JJ
9507 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9508 (__v4si) __Y, 1,
9509 (__mmask8) __M);
936c0fe4
AI
9510}
9511
6b62f323
JJ
9512extern __inline __mmask8
9513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9514_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9515{
6b62f323
JJ
9516 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9517 (__v4si) __Y, 1,
9518 (__mmask8) -1);
936c0fe4
AI
9519}
9520
6b62f323
JJ
9521extern __inline __mmask8
9522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9523_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9524{
6b62f323
JJ
9525 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9526 (__v4si) __Y, 5,
9527 (__mmask8) __M);
936c0fe4
AI
9528}
9529
6b62f323
JJ
9530extern __inline __mmask8
9531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9532_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9533{
6b62f323
JJ
9534 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9535 (__v4si) __Y, 5,
9536 (__mmask8) -1);
936c0fe4
AI
9537}
9538
6b62f323
JJ
9539extern __inline __mmask8
9540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9541_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9542{
6b62f323
JJ
9543 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9544 (__v4si) __Y, 2,
9545 (__mmask8) __M);
936c0fe4
AI
9546}
9547
6b62f323
JJ
9548extern __inline __mmask8
9549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9550_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9551{
6b62f323
JJ
9552 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9553 (__v4si) __Y, 2,
9554 (__mmask8) -1);
936c0fe4
AI
9555}
9556
6b62f323
JJ
9557extern __inline __mmask8
9558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9559_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9560{
6b62f323
JJ
9561 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9562 (__v2di) __Y, 4,
9563 (__mmask8) __M);
936c0fe4
AI
9564}
9565
6b62f323
JJ
9566extern __inline __mmask8
9567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9568_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9569{
6b62f323
JJ
9570 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9571 (__v2di) __Y, 4,
9572 (__mmask8) -1);
936c0fe4
AI
9573}
9574
6b62f323
JJ
9575extern __inline __mmask8
9576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9577_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9578{
6b62f323
JJ
9579 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9580 (__v2di) __Y, 1,
9581 (__mmask8) __M);
936c0fe4
AI
9582}
9583
6b62f323
JJ
9584extern __inline __mmask8
9585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9586_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9587{
6b62f323
JJ
9588 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9589 (__v2di) __Y, 1,
9590 (__mmask8) -1);
936c0fe4
AI
9591}
9592
6b62f323
JJ
9593extern __inline __mmask8
9594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9595_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9596{
6b62f323
JJ
9597 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9598 (__v2di) __Y, 5,
9599 (__mmask8) __M);
936c0fe4
AI
9600}
9601
6b62f323
JJ
9602extern __inline __mmask8
9603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9604_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9605{
6b62f323
JJ
9606 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9607 (__v2di) __Y, 5,
9608 (__mmask8) -1);
936c0fe4
AI
9609}
9610
6b62f323
JJ
9611extern __inline __mmask8
9612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9613_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9614{
6b62f323
JJ
9615 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9616 (__v2di) __Y, 2,
9617 (__mmask8) __M);
936c0fe4
AI
9618}
9619
6b62f323
JJ
9620extern __inline __mmask8
9621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9622_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9623{
6b62f323
JJ
9624 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9625 (__v2di) __Y, 2,
9626 (__mmask8) -1);
936c0fe4
AI
9627}
9628
6b62f323
JJ
9629extern __inline __mmask8
9630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9631_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9632{
6b62f323
JJ
9633 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9634 (__v4si) __Y, 4,
9635 (__mmask8) __M);
936c0fe4
AI
9636}
9637
6b62f323
JJ
9638extern __inline __mmask8
9639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9640_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
9641{
9642 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9643 (__v4si) __Y, 4,
9644 (__mmask8) -1);
9645}
9646
9647extern __inline __mmask8
9648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9649_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9650{
9651 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9652 (__v4si) __Y, 1,
9653 (__mmask8) __M);
9654}
9655
9656extern __inline __mmask8
9657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9658_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9659{
6b62f323
JJ
9660 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9661 (__v4si) __Y, 1,
9662 (__mmask8) -1);
936c0fe4
AI
9663}
9664
6b62f323
JJ
9665extern __inline __mmask8
9666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9667_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9668{
6b62f323
JJ
9669 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9670 (__v4si) __Y, 5,
9671 (__mmask8) __M);
936c0fe4
AI
9672}
9673
6b62f323
JJ
9674extern __inline __mmask8
9675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9676_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9677{
6b62f323
JJ
9678 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9679 (__v4si) __Y, 5,
9680 (__mmask8) -1);
936c0fe4
AI
9681}
9682
6b62f323
JJ
9683extern __inline __mmask8
9684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9685_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9686{
6b62f323
JJ
9687 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9688 (__v4si) __Y, 2,
9689 (__mmask8) __M);
936c0fe4
AI
9690}
9691
6b62f323
JJ
9692extern __inline __mmask8
9693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9694_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9695{
6b62f323
JJ
9696 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9697 (__v4si) __Y, 2,
9698 (__mmask8) -1);
936c0fe4
AI
9699}
9700
6b62f323
JJ
9701extern __inline __mmask8
9702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9703_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9704{
6b62f323
JJ
9705 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9706 (__v2di) __Y, 4,
9707 (__mmask8) __M);
936c0fe4
AI
9708}
9709
6b62f323
JJ
9710extern __inline __mmask8
9711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9712_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9713{
6b62f323
JJ
9714 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9715 (__v2di) __Y, 4,
9716 (__mmask8) -1);
936c0fe4
AI
9717}
9718
6b62f323
JJ
9719extern __inline __mmask8
9720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9721_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9722{
6b62f323
JJ
9723 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9724 (__v2di) __Y, 1,
9725 (__mmask8) __M);
936c0fe4
AI
9726}
9727
6b62f323
JJ
9728extern __inline __mmask8
9729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9730_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9731{
6b62f323
JJ
9732 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9733 (__v2di) __Y, 1,
9734 (__mmask8) -1);
936c0fe4
AI
9735}
9736
6b62f323
JJ
9737extern __inline __mmask8
9738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9739_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9740{
6b62f323
JJ
9741 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9742 (__v2di) __Y, 5,
9743 (__mmask8) __M);
936c0fe4
AI
9744}
9745
6b62f323
JJ
9746extern __inline __mmask8
9747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9748_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9749{
6b62f323
JJ
9750 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9751 (__v2di) __Y, 5,
9752 (__mmask8) -1);
936c0fe4
AI
9753}
9754
6b62f323
JJ
9755extern __inline __mmask8
9756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9757_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9758{
6b62f323
JJ
9759 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9760 (__v2di) __Y, 2,
9761 (__mmask8) __M);
936c0fe4
AI
9762}
9763
6b62f323
JJ
9764extern __inline __mmask8
9765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9766_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9767{
6b62f323
JJ
9768 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9769 (__v2di) __Y, 2,
9770 (__mmask8) -1);
936c0fe4
AI
9771}
9772
6b62f323 9773#ifdef __OPTIMIZE__
395a191d
SP
9774extern __inline __m256i
9775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9776_mm256_permutex_epi64 (__m256i __X, const int __I)
9777{
9778 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9779 __I,
9780 (__v4di)
9781 _mm256_setzero_si256(),
9782 (__mmask8) -1);
9783}
9784
6b62f323 9785extern __inline __m256i
936c0fe4 9786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9787_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9788 __m256i __X, const int __I)
936c0fe4 9789{
6b62f323
JJ
9790 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9791 __I,
9792 (__v4di) __W,
9793 (__mmask8) __M);
936c0fe4
AI
9794}
9795
6b62f323 9796extern __inline __m256i
936c0fe4 9797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9798_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
936c0fe4 9799{
6b62f323
JJ
9800 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9801 __I,
9802 (__v4di)
9803 _mm256_setzero_si256 (),
9804 (__mmask8) __M);
936c0fe4
AI
9805}
9806
6b62f323 9807extern __inline __m256d
936c0fe4 9808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9809_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9810 __m256d __B, const int __imm)
936c0fe4 9811{
6b62f323
JJ
9812 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9813 (__v4df) __B, __imm,
9814 (__v4df) __W,
9815 (__mmask8) __U);
936c0fe4
AI
9816}
9817
6b62f323 9818extern __inline __m256d
936c0fe4 9819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9820_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9821 const int __imm)
936c0fe4 9822{
6b62f323
JJ
9823 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9824 (__v4df) __B, __imm,
9825 (__v4df)
9826 _mm256_setzero_pd (),
9827 (__mmask8) __U);
936c0fe4
AI
9828}
9829
6b62f323 9830extern __inline __m128d
936c0fe4 9831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9832_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9833 __m128d __B, const int __imm)
936c0fe4 9834{
6b62f323
JJ
9835 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9836 (__v2df) __B, __imm,
9837 (__v2df) __W,
9838 (__mmask8) __U);
936c0fe4
AI
9839}
9840
6b62f323 9841extern __inline __m128d
936c0fe4 9842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9843_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9844 const int __imm)
936c0fe4 9845{
6b62f323
JJ
9846 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9847 (__v2df) __B, __imm,
9848 (__v2df)
9849 _mm_setzero_pd (),
9850 (__mmask8) __U);
936c0fe4
AI
9851}
9852
9853extern __inline __m256
9854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9855_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9856 __m256 __B, const int __imm)
936c0fe4 9857{
6b62f323
JJ
9858 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9859 (__v8sf) __B, __imm,
9860 (__v8sf) __W,
9861 (__mmask8) __U);
936c0fe4
AI
9862}
9863
6b62f323 9864extern __inline __m256
936c0fe4 9865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9866_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9867 const int __imm)
936c0fe4 9868{
6b62f323
JJ
9869 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9870 (__v8sf) __B, __imm,
9871 (__v8sf)
9872 _mm256_setzero_ps (),
9873 (__mmask8) __U);
936c0fe4
AI
9874}
9875
6b62f323 9876extern __inline __m128
936c0fe4 9877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9878_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9879 const int __imm)
936c0fe4 9880{
6b62f323
JJ
9881 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9882 (__v4sf) __B, __imm,
9883 (__v4sf) __W,
9884 (__mmask8) __U);
936c0fe4
AI
9885}
9886
6b62f323 9887extern __inline __m128
936c0fe4 9888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9889_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9890 const int __imm)
936c0fe4 9891{
6b62f323
JJ
9892 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9893 (__v4sf) __B, __imm,
9894 (__v4sf)
9895 _mm_setzero_ps (),
9896 (__mmask8) __U);
936c0fe4
AI
9897}
9898
6b62f323 9899extern __inline __m256i
936c0fe4 9900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9901_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
936c0fe4 9902{
6b62f323
JJ
9903 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9904 (__v4si) __B,
9905 __imm,
9906 (__v8si)
9907 _mm256_setzero_si256 (),
9908 (__mmask8) -1);
936c0fe4
AI
9909}
9910
6b62f323 9911extern __inline __m256i
936c0fe4 9912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9913_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9914 __m128i __B, const int __imm)
936c0fe4 9915{
6b62f323
JJ
9916 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9917 (__v4si) __B,
9918 __imm,
9919 (__v8si) __W,
9920 (__mmask8)
9921 __U);
936c0fe4
AI
9922}
9923
6b62f323 9924extern __inline __m256i
936c0fe4 9925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9926_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9927 const int __imm)
936c0fe4 9928{
6b62f323
JJ
9929 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9930 (__v4si) __B,
9931 __imm,
9932 (__v8si)
9933 _mm256_setzero_si256 (),
9934 (__mmask8)
9935 __U);
936c0fe4
AI
9936}
9937
6b62f323 9938extern __inline __m256
936c0fe4 9939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9940_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
936c0fe4 9941{
6b62f323
JJ
9942 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9943 (__v4sf) __B,
936c0fe4 9944 __imm,
6b62f323
JJ
9945 (__v8sf)
9946 _mm256_setzero_ps (),
936c0fe4
AI
9947 (__mmask8) -1);
9948}
9949
6b62f323 9950extern __inline __m256
936c0fe4 9951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9952_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9953 __m128 __B, const int __imm)
936c0fe4 9954{
6b62f323
JJ
9955 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9956 (__v4sf) __B,
936c0fe4 9957 __imm,
6b62f323 9958 (__v8sf) __W,
936c0fe4
AI
9959 (__mmask8) __U);
9960}
9961
6b62f323 9962extern __inline __m256
936c0fe4 9963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9964_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
9965 const int __imm)
936c0fe4 9966{
6b62f323
JJ
9967 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9968 (__v4sf) __B,
936c0fe4 9969 __imm,
6b62f323
JJ
9970 (__v8sf)
9971 _mm256_setzero_ps (),
936c0fe4
AI
9972 (__mmask8) __U);
9973}
9974
6b62f323 9975extern __inline __m128i
936c0fe4 9976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9977_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
936c0fe4 9978{
6b62f323
JJ
9979 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9980 __imm,
9981 (__v4si)
9982 _mm_setzero_si128 (),
9983 (__mmask8) -1);
936c0fe4
AI
9984}
9985
6b62f323 9986extern __inline __m128i
936c0fe4 9987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9988_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
9989 const int __imm)
936c0fe4 9990{
6b62f323
JJ
9991 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9992 __imm,
9993 (__v4si) __W,
9994 (__mmask8)
9995 __U);
936c0fe4
AI
9996}
9997
6b62f323 9998extern __inline __m128i
936c0fe4 9999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10000_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
10001 const int __imm)
936c0fe4 10002{
6b62f323
JJ
10003 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10004 __imm,
10005 (__v4si)
10006 _mm_setzero_si128 (),
10007 (__mmask8)
10008 __U);
936c0fe4
AI
10009}
10010
10011extern __inline __m128
10012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10013_mm256_extractf32x4_ps (__m256 __A, const int __imm)
936c0fe4 10014{
6b62f323
JJ
10015 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10016 __imm,
10017 (__v4sf)
10018 _mm_setzero_ps (),
10019 (__mmask8) -1);
936c0fe4
AI
10020}
10021
10022extern __inline __m128
10023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10024_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
10025 const int __imm)
936c0fe4 10026{
6b62f323
JJ
10027 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10028 __imm,
10029 (__v4sf) __W,
10030 (__mmask8)
10031 __U);
936c0fe4
AI
10032}
10033
10034extern __inline __m128
10035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10036_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
10037 const int __imm)
10038{
10039 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10040 __imm,
10041 (__v4sf)
10042 _mm_setzero_ps (),
10043 (__mmask8)
10044 __U);
10045}
10046
10047extern __inline __m256i
10048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10049_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
10050{
10051 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10052 (__v4di) __B,
10053 __imm,
10054 (__v4di)
10055 _mm256_setzero_si256 (),
10056 (__mmask8) -1);
10057}
10058
10059extern __inline __m256i
10060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10061_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
10062 __m256i __B, const int __imm)
936c0fe4 10063{
6b62f323
JJ
10064 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10065 (__v4di) __B,
10066 __imm,
10067 (__v4di) __W,
10068 (__mmask8) __U);
936c0fe4
AI
10069}
10070
6b62f323 10071extern __inline __m256i
936c0fe4 10072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10073_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
10074 const int __imm)
936c0fe4 10075{
6b62f323
JJ
10076 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10077 (__v4di) __B,
10078 __imm,
10079 (__v4di)
10080 _mm256_setzero_si256 (),
10081 (__mmask8) __U);
936c0fe4
AI
10082}
10083
6b62f323 10084extern __inline __m256i
936c0fe4 10085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10086_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 10087{
6b62f323
JJ
10088 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10089 (__v8si) __B,
10090 __imm,
10091 (__v8si)
10092 _mm256_setzero_si256 (),
10093 (__mmask8) -1);
936c0fe4
AI
10094}
10095
6b62f323 10096extern __inline __m256i
936c0fe4 10097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10098_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
10099 __m256i __B, const int __imm)
936c0fe4 10100{
6b62f323
JJ
10101 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10102 (__v8si) __B,
10103 __imm,
10104 (__v8si) __W,
10105 (__mmask8) __U);
936c0fe4
AI
10106}
10107
6b62f323 10108extern __inline __m256i
936c0fe4 10109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10110_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
10111 const int __imm)
936c0fe4 10112{
6b62f323
JJ
10113 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10114 (__v8si) __B,
10115 __imm,
10116 (__v8si)
10117 _mm256_setzero_si256 (),
10118 (__mmask8) __U);
936c0fe4
AI
10119}
10120
6b62f323 10121extern __inline __m256d
936c0fe4 10122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10123_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
936c0fe4 10124{
6b62f323
JJ
10125 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10126 (__v4df) __B,
10127 __imm,
10128 (__v4df)
10129 _mm256_setzero_pd (),
10130 (__mmask8) -1);
936c0fe4
AI
10131}
10132
6b62f323 10133extern __inline __m256d
936c0fe4 10134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10135_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
10136 __m256d __B, const int __imm)
936c0fe4 10137{
6b62f323
JJ
10138 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10139 (__v4df) __B,
10140 __imm,
10141 (__v4df) __W,
10142 (__mmask8) __U);
936c0fe4
AI
10143}
10144
6b62f323 10145extern __inline __m256d
936c0fe4 10146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10147_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
10148 const int __imm)
936c0fe4 10149{
6b62f323
JJ
10150 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10151 (__v4df) __B,
10152 __imm,
10153 (__v4df)
10154 _mm256_setzero_pd (),
10155 (__mmask8) __U);
936c0fe4
AI
10156}
10157
6b62f323 10158extern __inline __m256
936c0fe4 10159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10160_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
936c0fe4 10161{
6b62f323
JJ
10162 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10163 (__v8sf) __B,
10164 __imm,
10165 (__v8sf)
10166 _mm256_setzero_ps (),
10167 (__mmask8) -1);
936c0fe4
AI
10168}
10169
6b62f323 10170extern __inline __m256
936c0fe4 10171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10172_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10173 __m256 __B, const int __imm)
936c0fe4 10174{
6b62f323
JJ
10175 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10176 (__v8sf) __B,
10177 __imm,
10178 (__v8sf) __W,
10179 (__mmask8) __U);
936c0fe4
AI
10180}
10181
6b62f323 10182extern __inline __m256
936c0fe4 10183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10184_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
10185 const int __imm)
936c0fe4 10186{
6b62f323
JJ
10187 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10188 (__v8sf) __B,
10189 __imm,
10190 (__v8sf)
10191 _mm256_setzero_ps (),
10192 (__mmask8) __U);
936c0fe4
AI
10193}
10194
6b62f323 10195extern __inline __m256d
936c0fe4 10196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10197_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
10198 const int __imm)
936c0fe4 10199{
6b62f323
JJ
10200 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10201 (__v4df) __B,
10202 (__v4di) __C,
10203 __imm,
10204 (__mmask8) -1);
936c0fe4
AI
10205}
10206
6b62f323 10207extern __inline __m256d
936c0fe4 10208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10209_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
10210 __m256i __C, const int __imm)
936c0fe4 10211{
6b62f323
JJ
10212 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10213 (__v4df) __B,
10214 (__v4di) __C,
10215 __imm,
10216 (__mmask8) __U);
936c0fe4
AI
10217}
10218
10219extern __inline __m256d
10220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10221_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
10222 __m256i __C, const int __imm)
936c0fe4 10223{
6b62f323
JJ
10224 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
10225 (__v4df) __B,
10226 (__v4di) __C,
10227 __imm,
10228 (__mmask8) __U);
936c0fe4
AI
10229}
10230
6b62f323 10231extern __inline __m256
936c0fe4 10232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10233_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
10234 const int __imm)
936c0fe4 10235{
6b62f323
JJ
10236 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10237 (__v8sf) __B,
10238 (__v8si) __C,
10239 __imm,
10240 (__mmask8) -1);
936c0fe4
AI
10241}
10242
6b62f323 10243extern __inline __m256
936c0fe4 10244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10245_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
10246 __m256i __C, const int __imm)
936c0fe4 10247{
6b62f323
JJ
10248 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10249 (__v8sf) __B,
10250 (__v8si) __C,
10251 __imm,
10252 (__mmask8) __U);
936c0fe4
AI
10253}
10254
6b62f323 10255extern __inline __m256
936c0fe4 10256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10257_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
10258 __m256i __C, const int __imm)
936c0fe4 10259{
6b62f323
JJ
10260 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
10261 (__v8sf) __B,
10262 (__v8si) __C,
10263 __imm,
10264 (__mmask8) __U);
936c0fe4
AI
10265}
10266
6b62f323 10267extern __inline __m128d
936c0fe4 10268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10269_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
10270 const int __imm)
936c0fe4 10271{
6b62f323
JJ
10272 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10273 (__v2df) __B,
10274 (__v2di) __C,
10275 __imm,
10276 (__mmask8) -1);
936c0fe4
AI
10277}
10278
6b62f323 10279extern __inline __m128d
936c0fe4 10280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10281_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
10282 __m128i __C, const int __imm)
936c0fe4 10283{
6b62f323
JJ
10284 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10285 (__v2df) __B,
10286 (__v2di) __C,
10287 __imm,
10288 (__mmask8) __U);
936c0fe4
AI
10289}
10290
6b62f323 10291extern __inline __m128d
936c0fe4 10292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10293_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
10294 __m128i __C, const int __imm)
936c0fe4 10295{
6b62f323
JJ
10296 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
10297 (__v2df) __B,
10298 (__v2di) __C,
10299 __imm,
10300 (__mmask8) __U);
936c0fe4
AI
10301}
10302
6b62f323 10303extern __inline __m128
936c0fe4 10304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10305_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
936c0fe4 10306{
6b62f323
JJ
10307 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10308 (__v4sf) __B,
10309 (__v4si) __C,
10310 __imm,
10311 (__mmask8) -1);
936c0fe4
AI
10312}
10313
6b62f323 10314extern __inline __m128
936c0fe4 10315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10316_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
10317 __m128i __C, const int __imm)
936c0fe4 10318{
6b62f323
JJ
10319 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10320 (__v4sf) __B,
10321 (__v4si) __C,
10322 __imm,
10323 (__mmask8) __U);
936c0fe4
AI
10324}
10325
6b62f323 10326extern __inline __m128
936c0fe4 10327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10328_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
10329 __m128i __C, const int __imm)
936c0fe4 10330{
6b62f323
JJ
10331 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
10332 (__v4sf) __B,
10333 (__v4si) __C,
10334 __imm,
10335 (__mmask8) __U);
936c0fe4
AI
10336}
10337
6b62f323 10338extern __inline __m256i
936c0fe4 10339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10340_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10341 const int __imm)
936c0fe4 10342{
6b62f323
JJ
10343 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10344 (__v8si) __W,
10345 (__mmask8) __U);
936c0fe4
AI
10346}
10347
6b62f323 10348extern __inline __m256i
936c0fe4 10349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10350_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
936c0fe4 10351{
6b62f323
JJ
10352 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10353 (__v8si)
10354 _mm256_setzero_si256 (),
10355 (__mmask8) __U);
936c0fe4
AI
10356}
10357
6b62f323 10358extern __inline __m128i
936c0fe4 10359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10360_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10361 const int __imm)
936c0fe4 10362{
6b62f323
JJ
10363 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10364 (__v4si) __W,
10365 (__mmask8) __U);
936c0fe4
AI
10366}
10367
6b62f323 10368extern __inline __m128i
936c0fe4 10369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10370_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
936c0fe4 10371{
6b62f323
JJ
10372 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10373 (__v4si)
10374 _mm_setzero_si128 (),
10375 (__mmask8) __U);
936c0fe4
AI
10376}
10377
6b62f323 10378extern __inline __m256i
936c0fe4 10379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10380_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10381 const int __imm)
936c0fe4 10382{
6b62f323
JJ
10383 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10384 (__v4di) __W,
10385 (__mmask8) __U);
936c0fe4
AI
10386}
10387
6b62f323 10388extern __inline __m256i
936c0fe4 10389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10390_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
936c0fe4 10391{
6b62f323
JJ
10392 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10393 (__v4di)
10394 _mm256_setzero_si256 (),
10395 (__mmask8) __U);
936c0fe4
AI
10396}
10397
6b62f323 10398extern __inline __m128i
936c0fe4 10399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10400_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10401 const int __imm)
936c0fe4 10402{
6b62f323
JJ
10403 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10404 (__v2di) __W,
10405 (__mmask8) __U);
936c0fe4
AI
10406}
10407
6b62f323 10408extern __inline __m128i
936c0fe4 10409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10410_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
936c0fe4 10411{
6b62f323
JJ
10412 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10413 (__v2di)
10414 _mm_setzero_si128 (),
10415 (__mmask8) __U);
936c0fe4
AI
10416}
10417
6b62f323 10418extern __inline __m256i
936c0fe4 10419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10420_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
10421 const int __imm)
936c0fe4 10422{
6b62f323
JJ
10423 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10424 (__v4di) __B,
10425 (__v4di) __C, __imm,
10426 (__mmask8) -1);
936c0fe4
AI
10427}
10428
6b62f323 10429extern __inline __m256i
936c0fe4 10430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10431_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
10432 __m256i __B, __m256i __C,
10433 const int __imm)
936c0fe4 10434{
6b62f323
JJ
10435 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10436 (__v4di) __B,
10437 (__v4di) __C, __imm,
10438 (__mmask8) __U);
936c0fe4
AI
10439}
10440
6b62f323 10441extern __inline __m256i
936c0fe4 10442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10443_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
10444 __m256i __B, __m256i __C,
10445 const int __imm)
936c0fe4 10446{
6b62f323
JJ
10447 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
10448 (__v4di) __B,
10449 (__v4di) __C,
10450 __imm,
10451 (__mmask8) __U);
936c0fe4
AI
10452}
10453
6b62f323 10454extern __inline __m256i
936c0fe4 10455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10456_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
10457 const int __imm)
936c0fe4 10458{
6b62f323
JJ
10459 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10460 (__v8si) __B,
10461 (__v8si) __C, __imm,
10462 (__mmask8) -1);
936c0fe4
AI
10463}
10464
6b62f323 10465extern __inline __m256i
936c0fe4 10466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10467_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
10468 __m256i __B, __m256i __C,
10469 const int __imm)
936c0fe4 10470{
6b62f323
JJ
10471 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10472 (__v8si) __B,
10473 (__v8si) __C, __imm,
10474 (__mmask8) __U);
936c0fe4
AI
10475}
10476
6b62f323 10477extern __inline __m256i
936c0fe4 10478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10479_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
10480 __m256i __B, __m256i __C,
10481 const int __imm)
936c0fe4 10482{
6b62f323
JJ
10483 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
10484 (__v8si) __B,
10485 (__v8si) __C,
10486 __imm,
10487 (__mmask8) __U);
936c0fe4
AI
10488}
10489
6b62f323 10490extern __inline __m128i
936c0fe4 10491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10492_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
10493 const int __imm)
936c0fe4 10494{
6b62f323
JJ
10495 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10496 (__v2di) __B,
10497 (__v2di) __C, __imm,
10498 (__mmask8) -1);
936c0fe4
AI
10499}
10500
6b62f323 10501extern __inline __m128i
936c0fe4 10502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10503_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
10504 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10505{
6b62f323
JJ
10506 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10507 (__v2di) __B,
10508 (__v2di) __C, __imm,
10509 (__mmask8) __U);
936c0fe4
AI
10510}
10511
6b62f323 10512extern __inline __m128i
936c0fe4 10513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10514_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
10515 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10516{
6b62f323
JJ
10517 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
10518 (__v2di) __B,
10519 (__v2di) __C,
10520 __imm,
10521 (__mmask8) __U);
936c0fe4
AI
10522}
10523
6b62f323 10524extern __inline __m128i
936c0fe4 10525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10526_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
10527 const int __imm)
936c0fe4 10528{
6b62f323
JJ
10529 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10530 (__v4si) __B,
10531 (__v4si) __C, __imm,
10532 (__mmask8) -1);
936c0fe4
AI
10533}
10534
6b62f323 10535extern __inline __m128i
936c0fe4 10536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10537_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
10538 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10539{
6b62f323
JJ
10540 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10541 (__v4si) __B,
10542 (__v4si) __C, __imm,
10543 (__mmask8) __U);
936c0fe4
AI
10544}
10545
6b62f323 10546extern __inline __m128i
936c0fe4 10547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10548_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
10549 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10550{
6b62f323
JJ
10551 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
10552 (__v4si) __B,
10553 (__v4si) __C,
10554 __imm,
10555 (__mmask8) __U);
936c0fe4
AI
10556}
10557
6b62f323 10558extern __inline __m256
936c0fe4 10559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10560_mm256_roundscale_ps (__m256 __A, const int __imm)
936c0fe4 10561{
6b62f323
JJ
10562 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10563 __imm,
10564 (__v8sf)
10565 _mm256_setzero_ps (),
10566 (__mmask8) -1);
936c0fe4
AI
10567}
10568
6b62f323 10569extern __inline __m256
936c0fe4 10570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10571_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
10572 const int __imm)
936c0fe4 10573{
6b62f323
JJ
10574 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10575 __imm,
10576 (__v8sf) __W,
10577 (__mmask8) __U);
936c0fe4
AI
10578}
10579
6b62f323 10580extern __inline __m256
936c0fe4 10581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10582_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
936c0fe4 10583{
6b62f323
JJ
10584 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10585 __imm,
10586 (__v8sf)
10587 _mm256_setzero_ps (),
10588 (__mmask8) __U);
936c0fe4
AI
10589}
10590
6b62f323 10591extern __inline __m256d
936c0fe4 10592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10593_mm256_roundscale_pd (__m256d __A, const int __imm)
936c0fe4 10594{
6b62f323
JJ
10595 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10596 __imm,
10597 (__v4df)
10598 _mm256_setzero_pd (),
10599 (__mmask8) -1);
936c0fe4
AI
10600}
10601
6b62f323 10602extern __inline __m256d
936c0fe4 10603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10604_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
10605 const int __imm)
936c0fe4 10606{
6b62f323
JJ
10607 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10608 __imm,
10609 (__v4df) __W,
10610 (__mmask8) __U);
936c0fe4
AI
10611}
10612
6b62f323 10613extern __inline __m256d
936c0fe4 10614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10615_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
936c0fe4 10616{
6b62f323
JJ
10617 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10618 __imm,
10619 (__v4df)
10620 _mm256_setzero_pd (),
10621 (__mmask8) __U);
936c0fe4
AI
10622}
10623
6b62f323 10624extern __inline __m128
936c0fe4 10625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10626_mm_roundscale_ps (__m128 __A, const int __imm)
936c0fe4 10627{
6b62f323
JJ
10628 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10629 __imm,
10630 (__v4sf)
10631 _mm_setzero_ps (),
10632 (__mmask8) -1);
936c0fe4
AI
10633}
10634
6b62f323 10635extern __inline __m128
936c0fe4 10636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10637_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10638 const int __imm)
936c0fe4 10639{
6b62f323
JJ
10640 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10641 __imm,
10642 (__v4sf) __W,
10643 (__mmask8) __U);
936c0fe4
AI
10644}
10645
6b62f323 10646extern __inline __m128
936c0fe4 10647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10648_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
936c0fe4 10649{
6b62f323
JJ
10650 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10651 __imm,
10652 (__v4sf)
10653 _mm_setzero_ps (),
10654 (__mmask8) __U);
936c0fe4
AI
10655}
10656
6b62f323 10657extern __inline __m128d
936c0fe4 10658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10659_mm_roundscale_pd (__m128d __A, const int __imm)
936c0fe4 10660{
6b62f323
JJ
10661 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10662 __imm,
10663 (__v2df)
10664 _mm_setzero_pd (),
10665 (__mmask8) -1);
936c0fe4
AI
10666}
10667
6b62f323 10668extern __inline __m128d
936c0fe4 10669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10670_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10671 const int __imm)
936c0fe4 10672{
6b62f323
JJ
10673 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10674 __imm,
10675 (__v2df) __W,
10676 (__mmask8) __U);
936c0fe4
AI
10677}
10678
6b62f323 10679extern __inline __m128d
936c0fe4 10680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10681_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
936c0fe4 10682{
6b62f323
JJ
10683 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10684 __imm,
10685 (__v2df)
10686 _mm_setzero_pd (),
10687 (__mmask8) __U);
936c0fe4
AI
10688}
10689
6b62f323 10690extern __inline __m256
936c0fe4 10691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10692_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10693 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10694{
6b62f323
JJ
10695 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10696 (__C << 2) | __B,
10697 (__v8sf)
10698 _mm256_setzero_ps (),
10699 (__mmask8) -1);
936c0fe4
AI
10700}
10701
6b62f323 10702extern __inline __m256
936c0fe4 10703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10704_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10705 _MM_MANTISSA_NORM_ENUM __B,
10706 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10707{
6b62f323
JJ
10708 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10709 (__C << 2) | __B,
10710 (__v8sf) __W,
10711 (__mmask8) __U);
936c0fe4
AI
10712}
10713
6b62f323 10714extern __inline __m256
936c0fe4 10715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10716_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10717 _MM_MANTISSA_NORM_ENUM __B,
10718 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10719{
6b62f323
JJ
10720 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10721 (__C << 2) | __B,
10722 (__v8sf)
10723 _mm256_setzero_ps (),
10724 (__mmask8) __U);
936c0fe4
AI
10725}
10726
6b62f323 10727extern __inline __m128
936c0fe4 10728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10729_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10730 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10731{
6b62f323
JJ
10732 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10733 (__C << 2) | __B,
10734 (__v4sf)
10735 _mm_setzero_ps (),
10736 (__mmask8) -1);
936c0fe4
AI
10737}
10738
6b62f323 10739extern __inline __m128
936c0fe4 10740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10741_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10742 _MM_MANTISSA_NORM_ENUM __B,
10743 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10744{
6b62f323
JJ
10745 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10746 (__C << 2) | __B,
10747 (__v4sf) __W,
10748 (__mmask8) __U);
936c0fe4
AI
10749}
10750
6b62f323 10751extern __inline __m128
936c0fe4 10752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10753_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10754 _MM_MANTISSA_NORM_ENUM __B,
10755 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10756{
6b62f323
JJ
10757 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10758 (__C << 2) | __B,
10759 (__v4sf)
10760 _mm_setzero_ps (),
10761 (__mmask8) __U);
936c0fe4
AI
10762}
10763
6b62f323 10764extern __inline __m256d
936c0fe4 10765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10766_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10767 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10768{
6b62f323
JJ
10769 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10770 (__C << 2) | __B,
10771 (__v4df)
10772 _mm256_setzero_pd (),
10773 (__mmask8) -1);
936c0fe4
AI
10774}
10775
6b62f323 10776extern __inline __m256d
936c0fe4 10777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10778_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10779 _MM_MANTISSA_NORM_ENUM __B,
10780 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10781{
6b62f323
JJ
10782 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10783 (__C << 2) | __B,
10784 (__v4df) __W,
10785 (__mmask8) __U);
936c0fe4
AI
10786}
10787
6b62f323 10788extern __inline __m256d
936c0fe4 10789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10790_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10791 _MM_MANTISSA_NORM_ENUM __B,
10792 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10793{
6b62f323
JJ
10794 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10795 (__C << 2) | __B,
10796 (__v4df)
10797 _mm256_setzero_pd (),
10798 (__mmask8) __U);
936c0fe4
AI
10799}
10800
6b62f323 10801extern __inline __m128d
936c0fe4 10802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10803_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10804 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10805{
6b62f323
JJ
10806 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10807 (__C << 2) | __B,
10808 (__v2df)
10809 _mm_setzero_pd (),
10810 (__mmask8) -1);
936c0fe4
AI
10811}
10812
6b62f323 10813extern __inline __m128d
936c0fe4 10814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10815_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10816 _MM_MANTISSA_NORM_ENUM __B,
10817 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10818{
6b62f323
JJ
10819 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10820 (__C << 2) | __B,
10821 (__v2df) __W,
10822 (__mmask8) __U);
936c0fe4
AI
10823}
10824
6b62f323 10825extern __inline __m128d
936c0fe4 10826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10827_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10828 _MM_MANTISSA_NORM_ENUM __B,
10829 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10830{
6b62f323
JJ
10831 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10832 (__C << 2) | __B,
10833 (__v2df)
10834 _mm_setzero_pd (),
10835 (__mmask8) __U);
936c0fe4
AI
10836}
10837
6b62f323 10838extern __inline __m256
936c0fe4 10839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10840_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10841 __m256i __index, void const *__addr,
10842 int __scale)
936c0fe4 10843{
6b62f323
JJ
10844 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10845 __addr,
10846 (__v8si) __index,
10847 __mask, __scale);
936c0fe4
AI
10848}
10849
6b62f323 10850extern __inline __m128
936c0fe4 10851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10852_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10853 __m128i __index, void const *__addr,
10854 int __scale)
936c0fe4 10855{
6b62f323
JJ
10856 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10857 __addr,
10858 (__v4si) __index,
10859 __mask, __scale);
936c0fe4
AI
10860}
10861
6b62f323 10862extern __inline __m256d
936c0fe4 10863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10864_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10865 __m128i __index, void const *__addr,
10866 int __scale)
936c0fe4 10867{
6b62f323
JJ
10868 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10869 __addr,
10870 (__v4si) __index,
10871 __mask, __scale);
936c0fe4
AI
10872}
10873
6b62f323 10874extern __inline __m128d
936c0fe4 10875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10876_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10877 __m128i __index, void const *__addr,
10878 int __scale)
936c0fe4 10879{
6b62f323
JJ
10880 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10881 __addr,
10882 (__v4si) __index,
10883 __mask, __scale);
936c0fe4
AI
10884}
10885
6b62f323 10886extern __inline __m128
936c0fe4 10887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10888_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10889 __m256i __index, void const *__addr,
10890 int __scale)
10891{
10892 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10893 __addr,
10894 (__v4di) __index,
10895 __mask, __scale);
936c0fe4
AI
10896}
10897
6b62f323 10898extern __inline __m128
936c0fe4 10899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10900_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10901 __m128i __index, void const *__addr,
10902 int __scale)
936c0fe4 10903{
6b62f323
JJ
10904 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10905 __addr,
10906 (__v2di) __index,
10907 __mask, __scale);
936c0fe4
AI
10908}
10909
6b62f323 10910extern __inline __m256d
936c0fe4 10911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10912_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10913 __m256i __index, void const *__addr,
10914 int __scale)
936c0fe4 10915{
6b62f323
JJ
10916 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10917 __addr,
10918 (__v4di) __index,
10919 __mask, __scale);
936c0fe4
AI
10920}
10921
6b62f323 10922extern __inline __m128d
936c0fe4 10923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10924_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10925 __m128i __index, void const *__addr,
10926 int __scale)
936c0fe4 10927{
6b62f323
JJ
10928 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10929 __addr,
10930 (__v2di) __index,
10931 __mask, __scale);
936c0fe4
AI
10932}
10933
6b62f323 10934extern __inline __m256i
936c0fe4 10935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10936_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10937 __m256i __index, void const *__addr,
10938 int __scale)
936c0fe4 10939{
6b62f323
JJ
10940 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10941 __addr,
10942 (__v8si) __index,
10943 __mask, __scale);
936c0fe4
AI
10944}
10945
10946extern __inline __m128i
10947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10948_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10949 __m128i __index, void const *__addr,
10950 int __scale)
936c0fe4 10951{
6b62f323
JJ
10952 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
10953 __addr,
10954 (__v4si) __index,
10955 __mask, __scale);
936c0fe4
AI
10956}
10957
10958extern __inline __m256i
10959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10960_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10961 __m128i __index, void const *__addr,
10962 int __scale)
936c0fe4 10963{
6b62f323
JJ
10964 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
10965 __addr,
10966 (__v4si) __index,
10967 __mask, __scale);
936c0fe4
AI
10968}
10969
6b62f323 10970extern __inline __m128i
936c0fe4 10971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10972_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10973 __m128i __index, void const *__addr,
10974 int __scale)
936c0fe4 10975{
6b62f323
JJ
10976 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
10977 __addr,
10978 (__v4si) __index,
10979 __mask, __scale);
936c0fe4
AI
10980}
10981
6b62f323 10982extern __inline __m128i
936c0fe4 10983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10984_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10985 __m256i __index, void const *__addr,
10986 int __scale)
936c0fe4 10987{
6b62f323
JJ
10988 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
10989 __addr,
10990 (__v4di) __index,
10991 __mask, __scale);
936c0fe4
AI
10992}
10993
10994extern __inline __m128i
10995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10996_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10997 __m128i __index, void const *__addr,
10998 int __scale)
936c0fe4 10999{
6b62f323
JJ
11000 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
11001 __addr,
11002 (__v2di) __index,
11003 __mask, __scale);
936c0fe4
AI
11004}
11005
6b62f323 11006extern __inline __m256i
936c0fe4 11007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11008_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11009 __m256i __index, void const *__addr,
11010 int __scale)
936c0fe4 11011{
6b62f323
JJ
11012 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
11013 __addr,
11014 (__v4di) __index,
11015 __mask, __scale);
936c0fe4
AI
11016}
11017
11018extern __inline __m128i
11019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11020_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11021 __m128i __index, void const *__addr,
11022 int __scale)
936c0fe4 11023{
6b62f323
JJ
11024 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
11025 __addr,
11026 (__v2di) __index,
11027 __mask, __scale);
936c0fe4
AI
11028}
11029
6b62f323 11030extern __inline void
936c0fe4 11031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11032_mm256_i32scatter_ps (void *__addr, __m256i __index,
11033 __m256 __v1, const int __scale)
936c0fe4 11034{
6b62f323
JJ
11035 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
11036 (__v8si) __index, (__v8sf) __v1,
11037 __scale);
936c0fe4
AI
11038}
11039
6b62f323 11040extern __inline void
936c0fe4 11041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11042_mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11043 __m256i __index, __m256 __v1,
11044 const int __scale)
936c0fe4 11045{
6b62f323
JJ
11046 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
11047 (__v8sf) __v1, __scale);
936c0fe4
AI
11048}
11049
6b62f323 11050extern __inline void
936c0fe4 11051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11052_mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11053 const int __scale)
936c0fe4 11054{
6b62f323
JJ
11055 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
11056 (__v4si) __index, (__v4sf) __v1,
11057 __scale);
936c0fe4
AI
11058}
11059
6b62f323 11060extern __inline void
936c0fe4 11061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11062_mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11063 __m128i __index, __m128 __v1,
11064 const int __scale)
936c0fe4 11065{
6b62f323
JJ
11066 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
11067 (__v4sf) __v1, __scale);
936c0fe4
AI
11068}
11069
6b62f323 11070extern __inline void
936c0fe4 11071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11072_mm256_i32scatter_pd (void *__addr, __m128i __index,
11073 __m256d __v1, const int __scale)
936c0fe4 11074{
6b62f323
JJ
11075 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
11076 (__v4si) __index, (__v4df) __v1,
11077 __scale);
936c0fe4
AI
11078}
11079
6b62f323 11080extern __inline void
936c0fe4 11081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11082_mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11083 __m128i __index, __m256d __v1,
11084 const int __scale)
936c0fe4 11085{
6b62f323
JJ
11086 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
11087 (__v4df) __v1, __scale);
936c0fe4
AI
11088}
11089
6b62f323 11090extern __inline void
936c0fe4 11091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11092_mm_i32scatter_pd (void *__addr, __m128i __index,
11093 __m128d __v1, const int __scale)
936c0fe4 11094{
6b62f323
JJ
11095 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
11096 (__v4si) __index, (__v2df) __v1,
11097 __scale);
936c0fe4
AI
11098}
11099
6b62f323 11100extern __inline void
936c0fe4 11101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11102_mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11103 __m128i __index, __m128d __v1,
11104 const int __scale)
936c0fe4 11105{
6b62f323
JJ
11106 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
11107 (__v2df) __v1, __scale);
936c0fe4
AI
11108}
11109
6b62f323 11110extern __inline void
936c0fe4 11111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11112_mm256_i64scatter_ps (void *__addr, __m256i __index,
11113 __m128 __v1, const int __scale)
936c0fe4 11114{
6b62f323
JJ
11115 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
11116 (__v4di) __index, (__v4sf) __v1,
11117 __scale);
936c0fe4
AI
11118}
11119
6b62f323 11120extern __inline void
936c0fe4 11121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11122_mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11123 __m256i __index, __m128 __v1,
11124 const int __scale)
936c0fe4 11125{
6b62f323
JJ
11126 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
11127 (__v4sf) __v1, __scale);
936c0fe4
AI
11128}
11129
6b62f323 11130extern __inline void
936c0fe4 11131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11132_mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11133 const int __scale)
936c0fe4 11134{
6b62f323
JJ
11135 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
11136 (__v2di) __index, (__v4sf) __v1,
11137 __scale);
936c0fe4
AI
11138}
11139
6b62f323 11140extern __inline void
936c0fe4 11141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11142_mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11143 __m128i __index, __m128 __v1,
11144 const int __scale)
936c0fe4 11145{
6b62f323
JJ
11146 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
11147 (__v4sf) __v1, __scale);
936c0fe4
AI
11148}
11149
6b62f323 11150extern __inline void
936c0fe4 11151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11152_mm256_i64scatter_pd (void *__addr, __m256i __index,
11153 __m256d __v1, const int __scale)
936c0fe4 11154{
6b62f323
JJ
11155 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
11156 (__v4di) __index, (__v4df) __v1,
11157 __scale);
936c0fe4
AI
11158}
11159
6b62f323 11160extern __inline void
936c0fe4 11161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11162_mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11163 __m256i __index, __m256d __v1,
11164 const int __scale)
936c0fe4 11165{
6b62f323
JJ
11166 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
11167 (__v4df) __v1, __scale);
936c0fe4
AI
11168}
11169
6b62f323 11170extern __inline void
936c0fe4 11171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11172_mm_i64scatter_pd (void *__addr, __m128i __index,
11173 __m128d __v1, const int __scale)
936c0fe4 11174{
6b62f323
JJ
11175 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
11176 (__v2di) __index, (__v2df) __v1,
11177 __scale);
936c0fe4
AI
11178}
11179
6b62f323 11180extern __inline void
936c0fe4 11181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11182_mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11183 __m128i __index, __m128d __v1,
11184 const int __scale)
936c0fe4 11185{
6b62f323
JJ
11186 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
11187 (__v2df) __v1, __scale);
936c0fe4
AI
11188}
11189
6b62f323 11190extern __inline void
936c0fe4 11191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11192_mm256_i32scatter_epi32 (void *__addr, __m256i __index,
11193 __m256i __v1, const int __scale)
936c0fe4 11194{
6b62f323
JJ
11195 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
11196 (__v8si) __index, (__v8si) __v1,
11197 __scale);
936c0fe4
AI
11198}
11199
6b62f323 11200extern __inline void
936c0fe4 11201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11202_mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11203 __m256i __index, __m256i __v1,
11204 const int __scale)
936c0fe4 11205{
6b62f323
JJ
11206 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
11207 (__v8si) __v1, __scale);
936c0fe4
AI
11208}
11209
6b62f323 11210extern __inline void
936c0fe4 11211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11212_mm_i32scatter_epi32 (void *__addr, __m128i __index,
11213 __m128i __v1, const int __scale)
936c0fe4 11214{
6b62f323
JJ
11215 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
11216 (__v4si) __index, (__v4si) __v1,
11217 __scale);
936c0fe4
AI
11218}
11219
6b62f323 11220extern __inline void
936c0fe4 11221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11222_mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11223 __m128i __index, __m128i __v1,
11224 const int __scale)
936c0fe4 11225{
6b62f323
JJ
11226 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
11227 (__v4si) __v1, __scale);
936c0fe4
AI
11228}
11229
6b62f323 11230extern __inline void
936c0fe4 11231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11232_mm256_i32scatter_epi64 (void *__addr, __m128i __index,
11233 __m256i __v1, const int __scale)
936c0fe4 11234{
6b62f323
JJ
11235 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
11236 (__v4si) __index, (__v4di) __v1,
11237 __scale);
936c0fe4
AI
11238}
11239
6b62f323 11240extern __inline void
936c0fe4 11241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11242_mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11243 __m128i __index, __m256i __v1,
11244 const int __scale)
936c0fe4 11245{
6b62f323
JJ
11246 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
11247 (__v4di) __v1, __scale);
936c0fe4
AI
11248}
11249
6b62f323 11250extern __inline void
936c0fe4 11251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11252_mm_i32scatter_epi64 (void *__addr, __m128i __index,
11253 __m128i __v1, const int __scale)
936c0fe4 11254{
6b62f323
JJ
11255 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
11256 (__v4si) __index, (__v2di) __v1,
11257 __scale);
936c0fe4
AI
11258}
11259
6b62f323 11260extern __inline void
936c0fe4 11261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11262_mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11263 __m128i __index, __m128i __v1,
11264 const int __scale)
936c0fe4 11265{
6b62f323
JJ
11266 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
11267 (__v2di) __v1, __scale);
936c0fe4
AI
11268}
11269
6b62f323 11270extern __inline void
936c0fe4 11271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11272_mm256_i64scatter_epi32 (void *__addr, __m256i __index,
11273 __m128i __v1, const int __scale)
936c0fe4 11274{
6b62f323
JJ
11275 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
11276 (__v4di) __index, (__v4si) __v1,
11277 __scale);
936c0fe4
AI
11278}
11279
6b62f323 11280extern __inline void
936c0fe4 11281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11282_mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11283 __m256i __index, __m128i __v1,
11284 const int __scale)
936c0fe4 11285{
6b62f323
JJ
11286 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
11287 (__v4si) __v1, __scale);
936c0fe4
AI
11288}
11289
6b62f323 11290extern __inline void
936c0fe4 11291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11292_mm_i64scatter_epi32 (void *__addr, __m128i __index,
11293 __m128i __v1, const int __scale)
936c0fe4 11294{
6b62f323
JJ
11295 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
11296 (__v2di) __index, (__v4si) __v1,
11297 __scale);
936c0fe4
AI
11298}
11299
6b62f323 11300extern __inline void
936c0fe4 11301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11302_mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11303 __m128i __index, __m128i __v1,
11304 const int __scale)
936c0fe4 11305{
6b62f323
JJ
11306 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
11307 (__v4si) __v1, __scale);
936c0fe4
AI
11308}
11309
6b62f323 11310extern __inline void
936c0fe4 11311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11312_mm256_i64scatter_epi64 (void *__addr, __m256i __index,
11313 __m256i __v1, const int __scale)
936c0fe4 11314{
6b62f323
JJ
11315 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
11316 (__v4di) __index, (__v4di) __v1,
11317 __scale);
936c0fe4
AI
11318}
11319
6b62f323 11320extern __inline void
936c0fe4 11321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11322_mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11323 __m256i __index, __m256i __v1,
11324 const int __scale)
936c0fe4 11325{
6b62f323
JJ
11326 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
11327 (__v4di) __v1, __scale);
936c0fe4
AI
11328}
11329
6b62f323 11330extern __inline void
936c0fe4 11331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11332_mm_i64scatter_epi64 (void *__addr, __m128i __index,
11333 __m128i __v1, const int __scale)
936c0fe4 11334{
6b62f323
JJ
11335 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
11336 (__v2di) __index, (__v2di) __v1,
11337 __scale);
936c0fe4
AI
11338}
11339
6b62f323 11340extern __inline void
936c0fe4 11341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11342_mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11343 __m128i __index, __m128i __v1,
11344 const int __scale)
936c0fe4 11345{
6b62f323
JJ
11346 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
11347 (__v2di) __v1, __scale);
936c0fe4
AI
11348}
11349
11350extern __inline __m256i
11351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11352_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11353 _MM_PERM_ENUM __mask)
936c0fe4 11354{
6b62f323
JJ
11355 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11356 (__v8si) __W,
936c0fe4
AI
11357 (__mmask8) __U);
11358}
11359
11360extern __inline __m256i
11361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11362_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
11363 _MM_PERM_ENUM __mask)
936c0fe4 11364{
6b62f323
JJ
11365 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11366 (__v8si)
936c0fe4
AI
11367 _mm256_setzero_si256 (),
11368 (__mmask8) __U);
11369}
11370
6b62f323 11371extern __inline __m128i
936c0fe4 11372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11373_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11374 _MM_PERM_ENUM __mask)
936c0fe4 11375{
6b62f323
JJ
11376 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11377 (__v4si) __W,
936c0fe4
AI
11378 (__mmask8) __U);
11379}
11380
6b62f323 11381extern __inline __m128i
936c0fe4 11382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11383_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
11384 _MM_PERM_ENUM __mask)
936c0fe4 11385{
6b62f323
JJ
11386 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11387 (__v4si)
11388 _mm_setzero_si128 (),
936c0fe4
AI
11389 (__mmask8) __U);
11390}
11391
6b62f323 11392extern __inline __m256i
936c0fe4 11393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11394_mm256_rol_epi32 (__m256i __A, const int __B)
936c0fe4 11395{
6b62f323
JJ
11396 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11397 (__v8si)
11398 _mm256_setzero_si256 (),
11399 (__mmask8) -1);
936c0fe4
AI
11400}
11401
6b62f323 11402extern __inline __m256i
936c0fe4 11403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11404_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11405 const int __B)
936c0fe4 11406{
6b62f323
JJ
11407 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11408 (__v8si) __W,
11409 (__mmask8) __U);
936c0fe4
AI
11410}
11411
6b62f323 11412extern __inline __m256i
936c0fe4 11413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11414_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11415{
6b62f323
JJ
11416 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11417 (__v8si)
11418 _mm256_setzero_si256 (),
11419 (__mmask8) __U);
936c0fe4
AI
11420}
11421
6b62f323 11422extern __inline __m128i
936c0fe4 11423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11424_mm_rol_epi32 (__m128i __A, const int __B)
936c0fe4 11425{
6b62f323
JJ
11426 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11427 (__v4si)
11428 _mm_setzero_si128 (),
11429 (__mmask8) -1);
936c0fe4
AI
11430}
11431
6b62f323 11432extern __inline __m128i
936c0fe4 11433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11434_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11435 const int __B)
936c0fe4 11436{
6b62f323
JJ
11437 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11438 (__v4si) __W,
936c0fe4
AI
11439 (__mmask8) __U);
11440}
11441
6b62f323 11442extern __inline __m128i
936c0fe4 11443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11444_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11445{
6b62f323
JJ
11446 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11447 (__v4si)
11448 _mm_setzero_si128 (),
936c0fe4
AI
11449 (__mmask8) __U);
11450}
11451
6b62f323 11452extern __inline __m256i
936c0fe4 11453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11454_mm256_ror_epi32 (__m256i __A, const int __B)
936c0fe4 11455{
6b62f323
JJ
11456 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11457 (__v8si)
11458 _mm256_setzero_si256 (),
11459 (__mmask8) -1);
936c0fe4
AI
11460}
11461
6b62f323 11462extern __inline __m256i
936c0fe4 11463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11464_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11465 const int __B)
936c0fe4 11466{
6b62f323
JJ
11467 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11468 (__v8si) __W,
11469 (__mmask8) __U);
936c0fe4
AI
11470}
11471
11472extern __inline __m256i
11473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11474_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11475{
6b62f323
JJ
11476 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11477 (__v8si)
11478 _mm256_setzero_si256 (),
11479 (__mmask8) __U);
936c0fe4
AI
11480}
11481
6b62f323 11482extern __inline __m128i
936c0fe4 11483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11484_mm_ror_epi32 (__m128i __A, const int __B)
936c0fe4 11485{
6b62f323
JJ
11486 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11487 (__v4si)
11488 _mm_setzero_si128 (),
11489 (__mmask8) -1);
936c0fe4
AI
11490}
11491
6b62f323 11492extern __inline __m128i
936c0fe4 11493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11494_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11495 const int __B)
936c0fe4 11496{
6b62f323
JJ
11497 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11498 (__v4si) __W,
11499 (__mmask8) __U);
936c0fe4
AI
11500}
11501
6b62f323 11502extern __inline __m128i
936c0fe4 11503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11504_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11505{
6b62f323
JJ
11506 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11507 (__v4si)
11508 _mm_setzero_si128 (),
11509 (__mmask8) __U);
936c0fe4
AI
11510}
11511
6b62f323 11512extern __inline __m256i
936c0fe4 11513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11514_mm256_rol_epi64 (__m256i __A, const int __B)
936c0fe4 11515{
6b62f323
JJ
11516 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11517 (__v4di)
11518 _mm256_setzero_si256 (),
11519 (__mmask8) -1);
936c0fe4
AI
11520}
11521
6b62f323 11522extern __inline __m256i
936c0fe4 11523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11524_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11525 const int __B)
936c0fe4 11526{
6b62f323
JJ
11527 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11528 (__v4di) __W,
11529 (__mmask8) __U);
936c0fe4
AI
11530}
11531
6b62f323 11532extern __inline __m256i
936c0fe4 11533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11534_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
11535{
11536 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11537 (__v4di)
11538 _mm256_setzero_si256 (),
11539 (__mmask8) __U);
936c0fe4
AI
11540}
11541
6b62f323 11542extern __inline __m128i
936c0fe4 11543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11544_mm_rol_epi64 (__m128i __A, const int __B)
936c0fe4 11545{
6b62f323
JJ
11546 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11547 (__v2di)
11548 _mm_setzero_si128 (),
936c0fe4
AI
11549 (__mmask8) -1);
11550}
11551
6b62f323 11552extern __inline __m128i
936c0fe4 11553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11554_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11555 const int __B)
936c0fe4 11556{
6b62f323
JJ
11557 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11558 (__v2di) __W,
11559 (__mmask8) __U);
936c0fe4
AI
11560}
11561
6b62f323 11562extern __inline __m128i
936c0fe4 11563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11564_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11565{
6b62f323
JJ
11566 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11567 (__v2di)
11568 _mm_setzero_si128 (),
11569 (__mmask8) __U);
936c0fe4
AI
11570}
11571
6b62f323 11572extern __inline __m256i
936c0fe4 11573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11574_mm256_ror_epi64 (__m256i __A, const int __B)
936c0fe4 11575{
6b62f323
JJ
11576 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11577 (__v4di)
11578 _mm256_setzero_si256 (),
11579 (__mmask8) -1);
936c0fe4
AI
11580}
11581
6b62f323 11582extern __inline __m256i
936c0fe4 11583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11584_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11585 const int __B)
936c0fe4 11586{
6b62f323
JJ
11587 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11588 (__v4di) __W,
11589 (__mmask8) __U);
936c0fe4
AI
11590}
11591
6b62f323 11592extern __inline __m256i
936c0fe4 11593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11594_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11595{
6b62f323
JJ
11596 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11597 (__v4di)
11598 _mm256_setzero_si256 (),
936c0fe4
AI
11599 (__mmask8) __U);
11600}
11601
6b62f323 11602extern __inline __m128i
936c0fe4 11603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11604_mm_ror_epi64 (__m128i __A, const int __B)
936c0fe4 11605{
6b62f323
JJ
11606 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11607 (__v2di)
11608 _mm_setzero_si128 (),
11609 (__mmask8) -1);
936c0fe4
AI
11610}
11611
6b62f323 11612extern __inline __m128i
936c0fe4 11613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11614_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11615 const int __B)
936c0fe4 11616{
6b62f323
JJ
11617 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11618 (__v2di) __W,
11619 (__mmask8) __U);
936c0fe4
AI
11620}
11621
6b62f323 11622extern __inline __m128i
936c0fe4 11623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11624_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11625{
6b62f323
JJ
11626 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11627 (__v2di)
11628 _mm_setzero_si128 (),
11629 (__mmask8) __U);
936c0fe4
AI
11630}
11631
6b62f323 11632extern __inline __m128i
936c0fe4 11633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11634_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
936c0fe4 11635{
6b62f323
JJ
11636 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11637 (__v4si) __B, __imm,
11638 (__v4si)
11639 _mm_setzero_si128 (),
11640 (__mmask8) -1);
936c0fe4
AI
11641}
11642
6b62f323 11643extern __inline __m128i
936c0fe4 11644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11645_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11646 __m128i __B, const int __imm)
936c0fe4 11647{
6b62f323
JJ
11648 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11649 (__v4si) __B, __imm,
11650 (__v4si) __W,
936c0fe4
AI
11651 (__mmask8) __U);
11652}
11653
6b62f323 11654extern __inline __m128i
936c0fe4 11655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11656_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11657 const int __imm)
936c0fe4 11658{
6b62f323
JJ
11659 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11660 (__v4si) __B, __imm,
11661 (__v4si)
11662 _mm_setzero_si128 (),
11663 (__mmask8) __U);
936c0fe4
AI
11664}
11665
6b62f323 11666extern __inline __m128i
936c0fe4 11667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11668_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
936c0fe4 11669{
6b62f323
JJ
11670 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11671 (__v2di) __B, __imm,
11672 (__v2di)
11673 _mm_setzero_si128 (),
11674 (__mmask8) -1);
936c0fe4
AI
11675}
11676
6b62f323 11677extern __inline __m128i
936c0fe4 11678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11679_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11680 __m128i __B, const int __imm)
936c0fe4 11681{
6b62f323
JJ
11682 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11683 (__v2di) __B, __imm,
11684 (__v2di) __W,
11685 (__mmask8) __U);
936c0fe4
AI
11686}
11687
6b62f323 11688extern __inline __m128i
936c0fe4 11689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11690_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11691 const int __imm)
936c0fe4 11692{
6b62f323
JJ
11693 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11694 (__v2di) __B, __imm,
11695 (__v2di)
11696 _mm_setzero_si128 (),
11697 (__mmask8) __U);
936c0fe4
AI
11698}
11699
6b62f323 11700extern __inline __m256i
936c0fe4 11701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11702_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 11703{
6b62f323
JJ
11704 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11705 (__v8si) __B, __imm,
11706 (__v8si)
11707 _mm256_setzero_si256 (),
936c0fe4
AI
11708 (__mmask8) -1);
11709}
11710
6b62f323 11711extern __inline __m256i
936c0fe4 11712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11713_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11714 __m256i __B, const int __imm)
936c0fe4 11715{
6b62f323
JJ
11716 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11717 (__v8si) __B, __imm,
11718 (__v8si) __W,
11719 (__mmask8) __U);
936c0fe4
AI
11720}
11721
6b62f323 11722extern __inline __m256i
936c0fe4 11723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11724_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11725 const int __imm)
936c0fe4 11726{
6b62f323
JJ
11727 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11728 (__v8si) __B, __imm,
11729 (__v8si)
11730 _mm256_setzero_si256 (),
11731 (__mmask8) __U);
936c0fe4
AI
11732}
11733
6b62f323 11734extern __inline __m256i
936c0fe4 11735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11736_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 11737{
6b62f323
JJ
11738 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11739 (__v4di) __B, __imm,
11740 (__v4di)
11741 _mm256_setzero_si256 (),
11742 (__mmask8) -1);
936c0fe4
AI
11743}
11744
6b62f323 11745extern __inline __m256i
936c0fe4 11746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11747_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11748 __m256i __B, const int __imm)
936c0fe4 11749{
6b62f323
JJ
11750 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11751 (__v4di) __B, __imm,
11752 (__v4di) __W,
936c0fe4
AI
11753 (__mmask8) __U);
11754}
11755
6b62f323 11756extern __inline __m256i
936c0fe4 11757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11758_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11759 const int __imm)
936c0fe4 11760{
6b62f323
JJ
11761 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11762 (__v4di) __B, __imm,
11763 (__v4di)
11764 _mm256_setzero_si256 (),
936c0fe4
AI
11765 (__mmask8) __U);
11766}
11767
6b62f323 11768extern __inline __m128i
936c0fe4 11769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11770_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11771 const int __I)
936c0fe4 11772{
6b62f323
JJ
11773 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11774 (__v8hi) __W,
936c0fe4
AI
11775 (__mmask8) __U);
11776}
11777
6b62f323 11778extern __inline __m128i
936c0fe4 11779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11780_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
936c0fe4 11781{
6b62f323
JJ
11782 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11783 (__v8hi)
11784 _mm_setzero_si128 (),
936c0fe4
AI
11785 (__mmask8) __U);
11786}
11787
6b62f323 11788extern __inline __m128i
936c0fe4 11789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11790_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11791 const int __I)
936c0fe4 11792{
6b62f323
JJ
11793 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11794 (__v8hi) __W,
11795 (__mmask8) __U);
936c0fe4
AI
11796}
11797
6b62f323
JJ
11798extern __inline __m128i
11799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11800_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
eee5d6f5 11801{
6b62f323
JJ
11802 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11803 (__v8hi)
11804 _mm_setzero_si128 (),
11805 (__mmask8) __U);
eee5d6f5
AI
11806}
11807
6b62f323
JJ
11808extern __inline __m256i
11809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11810_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11811 const int __imm)
936c0fe4 11812{
6b62f323
JJ
11813 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11814 (__v8si) __W,
11815 (__mmask8) __U);
936c0fe4
AI
11816}
11817
6b62f323
JJ
11818extern __inline __m256i
11819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11820_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
eee5d6f5 11821{
6b62f323
JJ
11822 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11823 (__v8si)
11824 _mm256_setzero_si256 (),
11825 (__mmask8) __U);
eee5d6f5
AI
11826}
11827
6b62f323
JJ
11828extern __inline __m128i
11829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11830_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11831 const int __imm)
936c0fe4 11832{
6b62f323
JJ
11833 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11834 (__v4si) __W,
11835 (__mmask8) __U);
936c0fe4
AI
11836}
11837
6b62f323
JJ
11838extern __inline __m128i
11839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11840_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
eee5d6f5 11841{
6b62f323
JJ
11842 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11843 (__v4si)
11844 _mm_setzero_si128 (),
11845 (__mmask8) __U);
eee5d6f5
AI
11846}
11847
6b62f323
JJ
11848extern __inline __m256i
11849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11850_mm256_srai_epi64 (__m256i __A, const int __imm)
936c0fe4 11851{
6b62f323
JJ
11852 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11853 (__v4di)
11854 _mm256_setzero_si256 (),
c42b0bdf 11855 (__mmask8) -1);
936c0fe4
AI
11856}
11857
6b62f323
JJ
11858extern __inline __m256i
11859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11860_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11861 const int __imm)
936c0fe4 11862{
6b62f323
JJ
11863 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11864 (__v4di) __W,
11865 (__mmask8) __U);
936c0fe4
AI
11866}
11867
6b62f323
JJ
11868extern __inline __m256i
11869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11870_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
eee5d6f5 11871{
6b62f323
JJ
11872 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11873 (__v4di)
11874 _mm256_setzero_si256 (),
11875 (__mmask8) __U);
eee5d6f5
AI
11876}
11877
6b62f323
JJ
11878extern __inline __m128i
11879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11880_mm_srai_epi64 (__m128i __A, const int __imm)
936c0fe4 11881{
6b62f323
JJ
11882 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11883 (__v2di)
11884 _mm_setzero_si128 (),
c42b0bdf 11885 (__mmask8) -1);
936c0fe4
AI
11886}
11887
6b62f323
JJ
11888extern __inline __m128i
11889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11890_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11891 const int __imm)
936c0fe4 11892{
6b62f323
JJ
11893 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11894 (__v2di) __W,
11895 (__mmask8) __U);
936c0fe4
AI
11896}
11897
6b62f323
JJ
11898extern __inline __m128i
11899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11900_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
eee5d6f5 11901{
6b62f323
JJ
11902 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11903 (__v2di)
11904 _mm_setzero_si128 (),
11905 (__mmask8) __U);
eee5d6f5
AI
11906}
11907
6b62f323
JJ
11908extern __inline __m128i
11909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11910_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
936c0fe4 11911{
6b62f323
JJ
11912 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11913 (__v4si) __W,
11914 (__mmask8) __U);
936c0fe4
AI
11915}
11916
6b62f323
JJ
11917extern __inline __m128i
11918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11919_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
eee5d6f5 11920{
6b62f323
JJ
11921 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11922 (__v4si)
11923 _mm_setzero_si128 (),
11924 (__mmask8) __U);
eee5d6f5
AI
11925}
11926
6b62f323
JJ
11927extern __inline __m128i
11928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11929_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
936c0fe4 11930{
6b62f323
JJ
11931 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11932 (__v2di) __W,
11933 (__mmask8) __U);
936c0fe4
AI
11934}
11935
6b62f323
JJ
11936extern __inline __m128i
11937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11938_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
eee5d6f5 11939{
6b62f323
JJ
11940 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11941 (__v2di)
11942 _mm_setzero_si128 (),
11943 (__mmask8) __U);
eee5d6f5
AI
11944}
11945
6b62f323
JJ
11946extern __inline __m256i
11947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11948_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11949 int __B)
936c0fe4 11950{
6b62f323
JJ
11951 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11952 (__v8si) __W,
11953 (__mmask8) __U);
936c0fe4
AI
11954}
11955
6b62f323
JJ
11956extern __inline __m256i
11957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
11959{
11960 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11961 (__v8si)
11962 _mm256_setzero_si256 (),
11963 (__mmask8) __U);
eee5d6f5
AI
11964}
11965
6b62f323
JJ
11966extern __inline __m256i
11967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11968_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11969 int __B)
936c0fe4 11970{
6b62f323
JJ
11971 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11972 (__v4di) __W,
11973 (__mmask8) __U);
936c0fe4
AI
11974}
11975
6b62f323
JJ
11976extern __inline __m256i
11977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11978_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
eee5d6f5 11979{
6b62f323
JJ
11980 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11981 (__v4di)
11982 _mm256_setzero_si256 (),
11983 (__mmask8) __U);
eee5d6f5
AI
11984}
11985
6b62f323
JJ
11986extern __inline __m256d
11987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11988_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
11989 const int __imm)
936c0fe4 11990{
6b62f323
JJ
11991 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11992 (__v4df) __W,
11993 (__mmask8) __U);
936c0fe4
AI
11994}
11995
6b62f323
JJ
11996extern __inline __m256d
11997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11998_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
eee5d6f5 11999{
6b62f323
JJ
12000 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12001 (__v4df)
12002 _mm256_setzero_pd (),
12003 (__mmask8) __U);
eee5d6f5
AI
12004}
12005
6b62f323
JJ
12006extern __inline __m256d
12007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12008_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
12009 const int __C)
936c0fe4 12010{
6b62f323
JJ
12011 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12012 (__v4df) __W,
12013 (__mmask8) __U);
936c0fe4
AI
12014}
12015
6b62f323
JJ
12016extern __inline __m256d
12017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12018_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
eee5d6f5 12019{
6b62f323
JJ
12020 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12021 (__v4df)
12022 _mm256_setzero_pd (),
12023 (__mmask8) __U);
eee5d6f5
AI
12024}
12025
6b62f323
JJ
12026extern __inline __m128d
12027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12028_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
12029 const int __C)
936c0fe4 12030{
6b62f323
JJ
12031 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12032 (__v2df) __W,
12033 (__mmask8) __U);
936c0fe4
AI
12034}
12035
6b62f323
JJ
12036extern __inline __m128d
12037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12038_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
eee5d6f5 12039{
6b62f323
JJ
12040 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12041 (__v2df)
12042 _mm_setzero_pd (),
12043 (__mmask8) __U);
eee5d6f5
AI
12044}
12045
6b62f323
JJ
12046extern __inline __m256
12047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12048_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
12049 const int __C)
936c0fe4 12050{
6b62f323
JJ
12051 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12052 (__v8sf) __W,
12053 (__mmask8) __U);
936c0fe4
AI
12054}
12055
6b62f323
JJ
12056extern __inline __m256
12057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12058_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
eee5d6f5 12059{
6b62f323
JJ
12060 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12061 (__v8sf)
12062 _mm256_setzero_ps (),
12063 (__mmask8) __U);
eee5d6f5
AI
12064}
12065
6b62f323
JJ
12066extern __inline __m128
12067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12068_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
12069 const int __C)
936c0fe4 12070{
6b62f323
JJ
12071 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12072 (__v4sf) __W,
12073 (__mmask8) __U);
936c0fe4
AI
12074}
12075
6b62f323
JJ
12076extern __inline __m128
12077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12078_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
eee5d6f5 12079{
6b62f323
JJ
12080 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12081 (__v4sf)
12082 _mm_setzero_ps (),
12083 (__mmask8) __U);
eee5d6f5
AI
12084}
12085
6b62f323
JJ
12086extern __inline __m256d
12087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12088_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
936c0fe4 12089{
6b62f323
JJ
12090 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
12091 (__v4df) __W,
12092 (__mmask8) __U);
936c0fe4
AI
12093}
12094
6b62f323
JJ
12095extern __inline __m256
12096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12097_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
eee5d6f5 12098{
6b62f323
JJ
12099 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
12100 (__v8sf) __W,
12101 (__mmask8) __U);
eee5d6f5
AI
12102}
12103
6b62f323
JJ
12104extern __inline __m256i
12105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12106_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
936c0fe4 12107{
6b62f323
JJ
12108 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
12109 (__v4di) __W,
12110 (__mmask8) __U);
936c0fe4
AI
12111}
12112
6b62f323
JJ
12113extern __inline __m256i
12114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12115_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
eee5d6f5 12116{
6b62f323
JJ
12117 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
12118 (__v8si) __W,
12119 (__mmask8) __U);
eee5d6f5
AI
12120}
12121
6b62f323
JJ
12122extern __inline __m128d
12123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12124_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
936c0fe4 12125{
6b62f323
JJ
12126 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
12127 (__v2df) __W,
12128 (__mmask8) __U);
936c0fe4
AI
12129}
12130
6b62f323
JJ
12131extern __inline __m128
12132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12133_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
eee5d6f5 12134{
6b62f323
JJ
12135 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
12136 (__v4sf) __W,
12137 (__mmask8) __U);
eee5d6f5
AI
12138}
12139
6b62f323
JJ
12140extern __inline __m128i
12141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12142_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
936c0fe4 12143{
6b62f323
JJ
12144 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
12145 (__v2di) __W,
12146 (__mmask8) __U);
936c0fe4
AI
12147}
12148
6b62f323
JJ
12149extern __inline __m128i
12150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12151_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
eee5d6f5 12152{
6b62f323
JJ
12153 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
12154 (__v4si) __W,
12155 (__mmask8) __U);
eee5d6f5
AI
12156}
12157
936c0fe4 12158extern __inline __mmask8
6b62f323
JJ
12159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12160_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
936c0fe4 12161{
6b62f323
JJ
12162 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12163 (__v4di) __Y, __P,
12164 (__mmask8) -1);
936c0fe4
AI
12165}
12166
eee5d6f5 12167extern __inline __mmask8
6b62f323
JJ
12168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12169_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
eee5d6f5 12170{
6b62f323
JJ
12171 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12172 (__v8si) __Y, __P,
12173 (__mmask8) -1);
eee5d6f5
AI
12174}
12175
936c0fe4 12176extern __inline __mmask8
6b62f323
JJ
12177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12178_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
936c0fe4 12179{
6b62f323
JJ
12180 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12181 (__v4di) __Y, __P,
c42b0bdf 12182 (__mmask8) -1);
936c0fe4
AI
12183}
12184
eee5d6f5 12185extern __inline __mmask8
6b62f323
JJ
12186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12187_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
eee5d6f5 12188{
6b62f323
JJ
12189 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12190 (__v8si) __Y, __P,
12191 (__mmask8) -1);
eee5d6f5
AI
12192}
12193
936c0fe4 12194extern __inline __mmask8
6b62f323
JJ
12195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12196_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
936c0fe4 12197{
6b62f323
JJ
12198 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12199 (__v4df) __Y, __P,
c42b0bdf 12200 (__mmask8) -1);
936c0fe4
AI
12201}
12202
eee5d6f5 12203extern __inline __mmask8
6b62f323
JJ
12204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12205_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
eee5d6f5 12206{
6b62f323
JJ
12207 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12208 (__v8sf) __Y, __P,
12209 (__mmask8) -1);
eee5d6f5
AI
12210}
12211
936c0fe4 12212extern __inline __mmask8
6b62f323
JJ
12213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12214_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12215 const int __P)
936c0fe4 12216{
6b62f323
JJ
12217 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12218 (__v4di) __Y, __P,
12219 (__mmask8) __U);
936c0fe4
AI
12220}
12221
eee5d6f5 12222extern __inline __mmask8
6b62f323
JJ
12223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12224_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12225 const int __P)
eee5d6f5 12226{
6b62f323
JJ
12227 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12228 (__v8si) __Y, __P,
12229 (__mmask8) __U);
eee5d6f5
AI
12230}
12231
936c0fe4 12232extern __inline __mmask8
6b62f323
JJ
12233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12234_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12235 const int __P)
936c0fe4 12236{
6b62f323
JJ
12237 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12238 (__v4di) __Y, __P,
12239 (__mmask8) __U);
936c0fe4
AI
12240}
12241
eee5d6f5 12242extern __inline __mmask8
6b62f323
JJ
12243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12244_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12245 const int __P)
eee5d6f5 12246{
6b62f323
JJ
12247 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12248 (__v8si) __Y, __P,
12249 (__mmask8) __U);
eee5d6f5
AI
12250}
12251
936c0fe4 12252extern __inline __mmask8
6b62f323
JJ
12253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12254_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
12255 const int __P)
936c0fe4 12256{
6b62f323
JJ
12257 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12258 (__v4df) __Y, __P,
12259 (__mmask8) __U);
936c0fe4
AI
12260}
12261
eee5d6f5 12262extern __inline __mmask8
6b62f323
JJ
12263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12264_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
12265 const int __P)
eee5d6f5 12266{
6b62f323
JJ
12267 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12268 (__v8sf) __Y, __P,
12269 (__mmask8) __U);
eee5d6f5
AI
12270}
12271
936c0fe4 12272extern __inline __mmask8
6b62f323
JJ
12273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12274_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
936c0fe4 12275{
6b62f323
JJ
12276 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12277 (__v2di) __Y, __P,
c42b0bdf 12278 (__mmask8) -1);
936c0fe4
AI
12279}
12280
eee5d6f5 12281extern __inline __mmask8
6b62f323
JJ
12282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12283_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
eee5d6f5
AI
12284{
12285 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
6b62f323
JJ
12286 (__v4si) __Y, __P,
12287 (__mmask8) -1);
eee5d6f5
AI
12288}
12289
936c0fe4 12290extern __inline __mmask8
6b62f323
JJ
12291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12292_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
936c0fe4 12293{
6b62f323
JJ
12294 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12295 (__v2di) __Y, __P,
12296 (__mmask8) -1);
936c0fe4
AI
12297}
12298
eee5d6f5 12299extern __inline __mmask8
6b62f323
JJ
12300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12301_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
eee5d6f5 12302{
6b62f323
JJ
12303 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12304 (__v4si) __Y, __P,
12305 (__mmask8) -1);
eee5d6f5
AI
12306}
12307
936c0fe4 12308extern __inline __mmask8
6b62f323
JJ
12309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12310_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
936c0fe4 12311{
6b62f323
JJ
12312 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12313 (__v2df) __Y, __P,
12314 (__mmask8) -1);
936c0fe4
AI
12315}
12316
eee5d6f5 12317extern __inline __mmask8
6b62f323
JJ
12318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12319_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
eee5d6f5 12320{
6b62f323
JJ
12321 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12322 (__v4sf) __Y, __P,
12323 (__mmask8) -1);
eee5d6f5
AI
12324}
12325
936c0fe4 12326extern __inline __mmask8
6b62f323
JJ
12327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12328_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12329 const int __P)
936c0fe4
AI
12330{
12331 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
6b62f323
JJ
12332 (__v2di) __Y, __P,
12333 (__mmask8) __U);
936c0fe4
AI
12334}
12335
eee5d6f5 12336extern __inline __mmask8
6b62f323
JJ
12337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12338_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12339 const int __P)
eee5d6f5 12340{
6b62f323
JJ
12341 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12342 (__v4si) __Y, __P,
12343 (__mmask8) __U);
eee5d6f5
AI
12344}
12345
936c0fe4 12346extern __inline __mmask8
6b62f323
JJ
12347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12348_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12349 const int __P)
936c0fe4 12350{
6b62f323
JJ
12351 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12352 (__v2di) __Y, __P,
12353 (__mmask8) __U);
936c0fe4
AI
12354}
12355
eee5d6f5 12356extern __inline __mmask8
6b62f323
JJ
12357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12358_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12359 const int __P)
eee5d6f5 12360{
6b62f323
JJ
12361 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12362 (__v4si) __Y, __P,
12363 (__mmask8) __U);
eee5d6f5
AI
12364}
12365
936c0fe4 12366extern __inline __mmask8
6b62f323
JJ
12367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12368_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
12369 const int __P)
936c0fe4 12370{
6b62f323
JJ
12371 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12372 (__v2df) __Y, __P,
12373 (__mmask8) __U);
936c0fe4
AI
12374}
12375
eee5d6f5 12376extern __inline __mmask8
6b62f323
JJ
12377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12378_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
12379 const int __P)
eee5d6f5 12380{
6b62f323
JJ
12381 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12382 (__v4sf) __Y, __P,
12383 (__mmask8) __U);
eee5d6f5
AI
12384}
12385
6b62f323
JJ
12386extern __inline __m256d
12387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12388_mm256_permutex_pd (__m256d __X, const int __M)
936c0fe4 12389{
6b62f323
JJ
12390 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
12391 (__v4df)
12392 _mm256_undefined_pd (),
12393 (__mmask8) -1);
936c0fe4
AI
12394}
12395
12396#else
12397#define _mm256_permutex_pd(X, M) \
12398 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
a25a7887
JJ
12399 (__v4df)(__m256d) \
12400 _mm256_undefined_pd (), \
936c0fe4
AI
12401 (__mmask8)-1))
12402
395a191d
SP
12403#define _mm256_permutex_epi64(X, I) \
12404 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12405 (int)(I), \
12406 (__v4di)(__m256i) \
12407 (_mm256_setzero_si256 ()),\
12408 (__mmask8) -1))
12409
936c0fe4
AI
12410#define _mm256_maskz_permutex_epi64(M, X, I) \
12411 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12412 (int)(I), \
12413 (__v4di)(__m256i) \
a25a7887 12414 (_mm256_setzero_si256 ()),\
936c0fe4
AI
12415 (__mmask8)(M)))
12416
12417#define _mm256_mask_permutex_epi64(W, M, X, I) \
12418 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12419 (int)(I), \
12420 (__v4di)(__m256i)(W), \
12421 (__mmask8)(M)))
12422
12423#define _mm256_insertf32x4(X, Y, C) \
12424 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12425 (__v4sf)(__m128) (Y), (int) (C), \
a25a7887 12426 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12427 (__mmask8)-1))
12428
12429#define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12430 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12431 (__v4sf)(__m128) (Y), (int) (C), \
12432 (__v8sf)(__m256)(W), \
12433 (__mmask8)(U)))
12434
12435#define _mm256_maskz_insertf32x4(U, X, Y, C) \
12436 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12437 (__v4sf)(__m128) (Y), (int) (C), \
a25a7887 12438 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12439 (__mmask8)(U)))
12440
12441#define _mm256_inserti32x4(X, Y, C) \
12442 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12443 (__v4si)(__m128i) (Y), (int) (C), \
a25a7887 12444 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12445 (__mmask8)-1))
12446
12447#define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12448 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12449 (__v4si)(__m128i) (Y), (int) (C), \
12450 (__v8si)(__m256i)(W), \
12451 (__mmask8)(U)))
12452
12453#define _mm256_maskz_inserti32x4(U, X, Y, C) \
12454 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12455 (__v4si)(__m128i) (Y), (int) (C), \
a25a7887 12456 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12457 (__mmask8)(U)))
12458
12459#define _mm256_extractf32x4_ps(X, C) \
12460 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12461 (int) (C), \
a25a7887 12462 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12463 (__mmask8)-1))
12464
12465#define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12466 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12467 (int) (C), \
12468 (__v4sf)(__m128)(W), \
12469 (__mmask8)(U)))
12470
12471#define _mm256_maskz_extractf32x4_ps(U, X, C) \
12472 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12473 (int) (C), \
a25a7887 12474 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12475 (__mmask8)(U)))
12476
12477#define _mm256_extracti32x4_epi32(X, C) \
12478 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12479 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12480
12481#define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12482 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12483 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12484
12485#define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12486 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12487 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12488
12489#define _mm256_shuffle_i64x2(X, Y, C) \
12490 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12491 (__v4di)(__m256i)(Y), (int)(C), \
12492 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12493 (__mmask8)-1))
12494
12495#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12496 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12497 (__v4di)(__m256i)(Y), (int)(C), \
12498 (__v4di)(__m256i)(W),\
12499 (__mmask8)(U)))
12500
12501#define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12502 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12503 (__v4di)(__m256i)(Y), (int)(C), \
12504 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12505 (__mmask8)(U)))
12506
12507#define _mm256_shuffle_i32x4(X, Y, C) \
12508 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12509 (__v8si)(__m256i)(Y), (int)(C), \
a25a7887
JJ
12510 (__v8si)(__m256i) \
12511 _mm256_setzero_si256 (), \
936c0fe4
AI
12512 (__mmask8)-1))
12513
12514#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12515 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12516 (__v8si)(__m256i)(Y), (int)(C), \
12517 (__v8si)(__m256i)(W), \
12518 (__mmask8)(U)))
12519
12520#define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12521 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12522 (__v8si)(__m256i)(Y), (int)(C), \
a25a7887
JJ
12523 (__v8si)(__m256i) \
12524 _mm256_setzero_si256 (), \
936c0fe4
AI
12525 (__mmask8)(U)))
12526
12527#define _mm256_shuffle_f64x2(X, Y, C) \
12528 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12529 (__v4df)(__m256d)(Y), (int)(C), \
a25a7887 12530 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12531 (__mmask8)-1))
12532
12533#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12534 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12535 (__v4df)(__m256d)(Y), (int)(C), \
12536 (__v4df)(__m256d)(W), \
12537 (__mmask8)(U)))
12538
12539#define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12540 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12541 (__v4df)(__m256d)(Y), (int)(C), \
a25a7887 12542 (__v4df)(__m256d)_mm256_setzero_pd( ),\
936c0fe4
AI
12543 (__mmask8)(U)))
12544
12545#define _mm256_shuffle_f32x4(X, Y, C) \
12546 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12547 (__v8sf)(__m256)(Y), (int)(C), \
a25a7887 12548 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12549 (__mmask8)-1))
12550
12551#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12552 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12553 (__v8sf)(__m256)(Y), (int)(C), \
12554 (__v8sf)(__m256)(W), \
12555 (__mmask8)(U)))
12556
12557#define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12558 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12559 (__v8sf)(__m256)(Y), (int)(C), \
a25a7887 12560 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12561 (__mmask8)(U)))
12562
12563#define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12564 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12565 (__v4df)(__m256d)(B), (int)(C), \
12566 (__v4df)(__m256d)(W), \
12567 (__mmask8)(U)))
12568
12569#define _mm256_maskz_shuffle_pd(U, A, B, C) \
12570 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12571 (__v4df)(__m256d)(B), (int)(C), \
a25a7887
JJ
12572 (__v4df)(__m256d) \
12573 _mm256_setzero_pd (), \
936c0fe4
AI
12574 (__mmask8)(U)))
12575
12576#define _mm_mask_shuffle_pd(W, U, A, B, C) \
12577 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12578 (__v2df)(__m128d)(B), (int)(C), \
12579 (__v2df)(__m128d)(W), \
12580 (__mmask8)(U)))
12581
12582#define _mm_maskz_shuffle_pd(U, A, B, C) \
12583 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12584 (__v2df)(__m128d)(B), (int)(C), \
a25a7887 12585 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12586 (__mmask8)(U)))
12587
12588#define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12589 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12590 (__v8sf)(__m256)(B), (int)(C), \
12591 (__v8sf)(__m256)(W), \
12592 (__mmask8)(U)))
12593
12594#define _mm256_maskz_shuffle_ps(U, A, B, C) \
12595 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12596 (__v8sf)(__m256)(B), (int)(C), \
a25a7887 12597 (__v8sf)(__m256)_mm256_setzero_ps (),\
936c0fe4
AI
12598 (__mmask8)(U)))
12599
12600#define _mm_mask_shuffle_ps(W, U, A, B, C) \
12601 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12602 (__v4sf)(__m128)(B), (int)(C), \
12603 (__v4sf)(__m128)(W), \
12604 (__mmask8)(U)))
12605
12606#define _mm_maskz_shuffle_ps(U, A, B, C) \
12607 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12608 (__v4sf)(__m128)(B), (int)(C), \
a25a7887 12609 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12610 (__mmask8)(U)))
12611
12612#define _mm256_fixupimm_pd(X, Y, Z, C) \
12613 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12614 (__v4df)(__m256d)(Y), \
12615 (__v4di)(__m256i)(Z), (int)(C), \
12616 (__mmask8)(-1)))
12617
12618#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
12619 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12620 (__v4df)(__m256d)(Y), \
12621 (__v4di)(__m256i)(Z), (int)(C), \
12622 (__mmask8)(U)))
12623
12624#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
12625 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
12626 (__v4df)(__m256d)(Y), \
12627 (__v4di)(__m256i)(Z), (int)(C),\
12628 (__mmask8)(U)))
12629
12630#define _mm256_fixupimm_ps(X, Y, Z, C) \
12631 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12632 (__v8sf)(__m256)(Y), \
12633 (__v8si)(__m256i)(Z), (int)(C), \
12634 (__mmask8)(-1)))
12635
12636
12637#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
12638 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12639 (__v8sf)(__m256)(Y), \
12640 (__v8si)(__m256i)(Z), (int)(C), \
12641 (__mmask8)(U)))
12642
12643#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
12644 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
12645 (__v8sf)(__m256)(Y), \
12646 (__v8si)(__m256i)(Z), (int)(C),\
12647 (__mmask8)(U)))
12648
12649#define _mm_fixupimm_pd(X, Y, Z, C) \
12650 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12651 (__v2df)(__m128d)(Y), \
12652 (__v2di)(__m128i)(Z), (int)(C), \
12653 (__mmask8)(-1)))
12654
12655
12656#define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
12657 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12658 (__v2df)(__m128d)(Y), \
12659 (__v2di)(__m128i)(Z), (int)(C), \
12660 (__mmask8)(U)))
12661
12662#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
12663 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
12664 (__v2df)(__m128d)(Y), \
12665 (__v2di)(__m128i)(Z), (int)(C),\
12666 (__mmask8)(U)))
12667
12668#define _mm_fixupimm_ps(X, Y, Z, C) \
12669 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12670 (__v4sf)(__m128)(Y), \
12671 (__v4si)(__m128i)(Z), (int)(C), \
12672 (__mmask8)(-1)))
12673
12674#define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
12675 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12676 (__v4sf)(__m128)(Y), \
12677 (__v4si)(__m128i)(Z), (int)(C),\
12678 (__mmask8)(U)))
12679
12680#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
12681 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
12682 (__v4sf)(__m128)(Y), \
12683 (__v4si)(__m128i)(Z), (int)(C),\
12684 (__mmask8)(U)))
12685
12686#define _mm256_mask_srli_epi32(W, U, A, B) \
12687 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12688 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12689
12690#define _mm256_maskz_srli_epi32(U, A, B) \
12691 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
a25a7887 12692 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
936c0fe4
AI
12693
12694#define _mm_mask_srli_epi32(W, U, A, B) \
12695 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12696 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12697
12698#define _mm_maskz_srli_epi32(U, A, B) \
12699 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
a25a7887 12700 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
12701
12702#define _mm256_mask_srli_epi64(W, U, A, B) \
12703 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12704 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12705
12706#define _mm256_maskz_srli_epi64(U, A, B) \
12707 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12708 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12709
12710#define _mm_mask_srli_epi64(W, U, A, B) \
12711 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12712 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12713
12714#define _mm_maskz_srli_epi64(U, A, B) \
12715 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 12716 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
12717
12718#define _mm256_mask_slli_epi32(W, U, X, C) \
12719 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
a25a7887 12720 (__v8si)(__m256i)(W), \
936c0fe4
AI
12721 (__mmask8)(U)))
12722
12723#define _mm256_maskz_slli_epi32(U, X, C) \
12724 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
a25a7887 12725 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12726 (__mmask8)(U)))
12727
12728#define _mm256_mask_slli_epi64(W, U, X, C) \
12729 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
a25a7887 12730 (__v4di)(__m256i)(W), \
936c0fe4
AI
12731 (__mmask8)(U)))
12732
12733#define _mm256_maskz_slli_epi64(U, X, C) \
12734 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
a25a7887 12735 (__v4di)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12736 (__mmask8)(U)))
12737
12738#define _mm_mask_slli_epi32(W, U, X, C) \
12739 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12740 (__v4si)(__m128i)(W),\
12741 (__mmask8)(U)))
12742
12743#define _mm_maskz_slli_epi32(U, X, C) \
12744 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12745 (__v4si)(__m128i)_mm_setzero_si128 (),\
12746 (__mmask8)(U)))
12747
12748#define _mm_mask_slli_epi64(W, U, X, C) \
12749 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12750 (__v2di)(__m128i)(W),\
12751 (__mmask8)(U)))
12752
12753#define _mm_maskz_slli_epi64(U, X, C) \
12754 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
a25a7887 12755 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
12756 (__mmask8)(U)))
12757
12758#define _mm256_ternarylogic_epi64(A, B, C, I) \
12759 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12760 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12761
12762#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12763 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12764 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12765
12766#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12767 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12768 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12769
12770#define _mm256_ternarylogic_epi32(A, B, C, I) \
12771 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12772 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12773
12774#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12775 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12776 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12777
12778#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12779 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12780 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12781
12782#define _mm_ternarylogic_epi64(A, B, C, I) \
12783 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12784 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12785
12786#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12787 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12788 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12789
12790#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12791 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12792 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12793
12794#define _mm_ternarylogic_epi32(A, B, C, I) \
12795 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12796 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12797
12798#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12799 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12800 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12801
12802#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12803 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12804 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12805
12806#define _mm256_roundscale_ps(A, B) \
12807 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
a25a7887 12808 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
936c0fe4
AI
12809
12810#define _mm256_mask_roundscale_ps(W, U, A, B) \
12811 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12812 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12813
12814#define _mm256_maskz_roundscale_ps(U, A, B) \
12815 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
a25a7887 12816 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
936c0fe4
AI
12817
12818#define _mm256_roundscale_pd(A, B) \
12819 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
a25a7887 12820 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
936c0fe4
AI
12821
12822#define _mm256_mask_roundscale_pd(W, U, A, B) \
12823 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12824 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12825
12826#define _mm256_maskz_roundscale_pd(U, A, B) \
12827 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
a25a7887 12828 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
12829
12830#define _mm_roundscale_ps(A, B) \
12831 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
a25a7887 12832 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
936c0fe4
AI
12833
12834#define _mm_mask_roundscale_ps(W, U, A, B) \
12835 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12836 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12837
12838#define _mm_maskz_roundscale_ps(U, A, B) \
12839 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
a25a7887 12840 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
936c0fe4
AI
12841
12842#define _mm_roundscale_pd(A, B) \
12843 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
a25a7887 12844 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
936c0fe4
AI
12845
12846#define _mm_mask_roundscale_pd(W, U, A, B) \
12847 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12848 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12849
12850#define _mm_maskz_roundscale_pd(U, A, B) \
12851 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
a25a7887 12852 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
12853
12854#define _mm256_getmant_ps(X, B, C) \
12855 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12856 (int)(((C)<<2) | (B)), \
a25a7887 12857 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12858 (__mmask8)-1))
12859
12860#define _mm256_mask_getmant_ps(W, U, X, B, C) \
12861 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12862 (int)(((C)<<2) | (B)), \
12863 (__v8sf)(__m256)(W), \
12864 (__mmask8)(U)))
12865
12866#define _mm256_maskz_getmant_ps(U, X, B, C) \
12867 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12868 (int)(((C)<<2) | (B)), \
a25a7887 12869 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12870 (__mmask8)(U)))
12871
12872#define _mm_getmant_ps(X, B, C) \
12873 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12874 (int)(((C)<<2) | (B)), \
a25a7887 12875 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12876 (__mmask8)-1))
12877
12878#define _mm_mask_getmant_ps(W, U, X, B, C) \
12879 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12880 (int)(((C)<<2) | (B)), \
12881 (__v4sf)(__m128)(W), \
12882 (__mmask8)(U)))
12883
12884#define _mm_maskz_getmant_ps(U, X, B, C) \
12885 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12886 (int)(((C)<<2) | (B)), \
a25a7887 12887 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12888 (__mmask8)(U)))
12889
12890#define _mm256_getmant_pd(X, B, C) \
12891 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12892 (int)(((C)<<2) | (B)), \
a25a7887 12893 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12894 (__mmask8)-1))
12895
12896#define _mm256_mask_getmant_pd(W, U, X, B, C) \
12897 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12898 (int)(((C)<<2) | (B)), \
12899 (__v4df)(__m256d)(W), \
12900 (__mmask8)(U)))
12901
12902#define _mm256_maskz_getmant_pd(U, X, B, C) \
12903 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12904 (int)(((C)<<2) | (B)), \
a25a7887 12905 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12906 (__mmask8)(U)))
12907
12908#define _mm_getmant_pd(X, B, C) \
12909 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12910 (int)(((C)<<2) | (B)), \
a25a7887 12911 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12912 (__mmask8)-1))
12913
12914#define _mm_mask_getmant_pd(W, U, X, B, C) \
12915 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12916 (int)(((C)<<2) | (B)), \
12917 (__v2df)(__m128d)(W), \
12918 (__mmask8)(U)))
12919
12920#define _mm_maskz_getmant_pd(U, X, B, C) \
12921 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12922 (int)(((C)<<2) | (B)), \
a25a7887 12923 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12924 (__mmask8)(U)))
12925
12926#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12927 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
0e171d54 12928 (void const *)ADDR, \
936c0fe4
AI
12929 (__v8si)(__m256i)INDEX, \
12930 (__mmask8)MASK, (int)SCALE)
12931
12932#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12933 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
0e171d54 12934 (void const *)ADDR, \
936c0fe4
AI
12935 (__v4si)(__m128i)INDEX, \
12936 (__mmask8)MASK, (int)SCALE)
12937
12938#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12939 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
0e171d54 12940 (void const *)ADDR, \
936c0fe4
AI
12941 (__v4si)(__m128i)INDEX, \
12942 (__mmask8)MASK, (int)SCALE)
12943
12944#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12945 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
0e171d54 12946 (void const *)ADDR, \
936c0fe4
AI
12947 (__v4si)(__m128i)INDEX, \
12948 (__mmask8)MASK, (int)SCALE)
12949
12950#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12951 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
0e171d54 12952 (void const *)ADDR, \
936c0fe4
AI
12953 (__v4di)(__m256i)INDEX, \
12954 (__mmask8)MASK, (int)SCALE)
12955
12956#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12957 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
0e171d54 12958 (void const *)ADDR, \
936c0fe4
AI
12959 (__v2di)(__m128i)INDEX, \
12960 (__mmask8)MASK, (int)SCALE)
12961
12962#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12963 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
0e171d54 12964 (void const *)ADDR, \
936c0fe4
AI
12965 (__v4di)(__m256i)INDEX, \
12966 (__mmask8)MASK, (int)SCALE)
12967
12968#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12969 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
0e171d54 12970 (void const *)ADDR, \
936c0fe4
AI
12971 (__v2di)(__m128i)INDEX, \
12972 (__mmask8)MASK, (int)SCALE)
12973
12974#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12975 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
0e171d54 12976 (void const *)ADDR, \
936c0fe4
AI
12977 (__v8si)(__m256i)INDEX, \
12978 (__mmask8)MASK, (int)SCALE)
12979
12980#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12981 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
0e171d54 12982 (void const *)ADDR, \
936c0fe4
AI
12983 (__v4si)(__m128i)INDEX, \
12984 (__mmask8)MASK, (int)SCALE)
12985
12986#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12987 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
0e171d54 12988 (void const *)ADDR, \
936c0fe4
AI
12989 (__v4si)(__m128i)INDEX, \
12990 (__mmask8)MASK, (int)SCALE)
12991
12992#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12993 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
0e171d54 12994 (void const *)ADDR, \
936c0fe4
AI
12995 (__v4si)(__m128i)INDEX, \
12996 (__mmask8)MASK, (int)SCALE)
12997
12998#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12999 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
0e171d54 13000 (void const *)ADDR, \
936c0fe4
AI
13001 (__v4di)(__m256i)INDEX, \
13002 (__mmask8)MASK, (int)SCALE)
13003
13004#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13005 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
0e171d54 13006 (void const *)ADDR, \
936c0fe4
AI
13007 (__v2di)(__m128i)INDEX, \
13008 (__mmask8)MASK, (int)SCALE)
13009
13010#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13011 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
0e171d54 13012 (void const *)ADDR, \
936c0fe4
AI
13013 (__v4di)(__m256i)INDEX, \
13014 (__mmask8)MASK, (int)SCALE)
13015
13016#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13017 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
0e171d54 13018 (void const *)ADDR, \
936c0fe4
AI
13019 (__v2di)(__m128i)INDEX, \
13020 (__mmask8)MASK, (int)SCALE)
13021
13022#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13023 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13024 (__v8si)(__m256i)INDEX, \
13025 (__v8sf)(__m256)V1, (int)SCALE)
13026
13027#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13028 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13029 (__v8si)(__m256i)INDEX, \
13030 (__v8sf)(__m256)V1, (int)SCALE)
13031
13032#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13033 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13034 (__v4si)(__m128i)INDEX, \
13035 (__v4sf)(__m128)V1, (int)SCALE)
13036
13037#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13038 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13039 (__v4si)(__m128i)INDEX, \
13040 (__v4sf)(__m128)V1, (int)SCALE)
13041
13042#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13043 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13044 (__v4si)(__m128i)INDEX, \
13045 (__v4df)(__m256d)V1, (int)SCALE)
13046
13047#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13048 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13049 (__v4si)(__m128i)INDEX, \
13050 (__v4df)(__m256d)V1, (int)SCALE)
13051
13052#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13053 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13054 (__v4si)(__m128i)INDEX, \
13055 (__v2df)(__m128d)V1, (int)SCALE)
13056
13057#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13058 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13059 (__v4si)(__m128i)INDEX, \
13060 (__v2df)(__m128d)V1, (int)SCALE)
13061
13062#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13063 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13064 (__v4di)(__m256i)INDEX, \
13065 (__v4sf)(__m128)V1, (int)SCALE)
13066
13067#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13068 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13069 (__v4di)(__m256i)INDEX, \
13070 (__v4sf)(__m128)V1, (int)SCALE)
13071
13072#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13073 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13074 (__v2di)(__m128i)INDEX, \
13075 (__v4sf)(__m128)V1, (int)SCALE)
13076
13077#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13078 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13079 (__v2di)(__m128i)INDEX, \
13080 (__v4sf)(__m128)V1, (int)SCALE)
13081
13082#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13083 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13084 (__v4di)(__m256i)INDEX, \
13085 (__v4df)(__m256d)V1, (int)SCALE)
13086
13087#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13088 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13089 (__v4di)(__m256i)INDEX, \
13090 (__v4df)(__m256d)V1, (int)SCALE)
13091
13092#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13093 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13094 (__v2di)(__m128i)INDEX, \
13095 (__v2df)(__m128d)V1, (int)SCALE)
13096
13097#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13098 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13099 (__v2di)(__m128i)INDEX, \
13100 (__v2df)(__m128d)V1, (int)SCALE)
13101
13102#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13103 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13104 (__v8si)(__m256i)INDEX, \
13105 (__v8si)(__m256i)V1, (int)SCALE)
13106
13107#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13108 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13109 (__v8si)(__m256i)INDEX, \
13110 (__v8si)(__m256i)V1, (int)SCALE)
13111
13112#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13113 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13114 (__v4si)(__m128i)INDEX, \
13115 (__v4si)(__m128i)V1, (int)SCALE)
13116
13117#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13118 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13119 (__v4si)(__m128i)INDEX, \
13120 (__v4si)(__m128i)V1, (int)SCALE)
13121
13122#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13123 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13124 (__v4si)(__m128i)INDEX, \
13125 (__v4di)(__m256i)V1, (int)SCALE)
13126
13127#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13128 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13129 (__v4si)(__m128i)INDEX, \
13130 (__v4di)(__m256i)V1, (int)SCALE)
13131
13132#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13133 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13134 (__v4si)(__m128i)INDEX, \
13135 (__v2di)(__m128i)V1, (int)SCALE)
13136
13137#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13138 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13139 (__v4si)(__m128i)INDEX, \
13140 (__v2di)(__m128i)V1, (int)SCALE)
13141
13142#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13143 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13144 (__v4di)(__m256i)INDEX, \
13145 (__v4si)(__m128i)V1, (int)SCALE)
13146
13147#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13148 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13149 (__v4di)(__m256i)INDEX, \
13150 (__v4si)(__m128i)V1, (int)SCALE)
13151
13152#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13153 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13154 (__v2di)(__m128i)INDEX, \
13155 (__v4si)(__m128i)V1, (int)SCALE)
13156
13157#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13158 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13159 (__v2di)(__m128i)INDEX, \
13160 (__v4si)(__m128i)V1, (int)SCALE)
13161
13162#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13163 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13164 (__v4di)(__m256i)INDEX, \
13165 (__v4di)(__m256i)V1, (int)SCALE)
13166
13167#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13168 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13169 (__v4di)(__m256i)INDEX, \
13170 (__v4di)(__m256i)V1, (int)SCALE)
13171
13172#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13173 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13174 (__v2di)(__m128i)INDEX, \
13175 (__v2di)(__m128i)V1, (int)SCALE)
13176
13177#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13178 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13179 (__v2di)(__m128i)INDEX, \
13180 (__v2di)(__m128i)V1, (int)SCALE)
13181
13182#define _mm256_mask_shuffle_epi32(W, U, X, C) \
13183 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13184 (__v8si)(__m256i)(W), \
13185 (__mmask8)(U)))
13186
13187#define _mm256_maskz_shuffle_epi32(U, X, C) \
13188 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
a25a7887
JJ
13189 (__v8si)(__m256i) \
13190 _mm256_setzero_si256 (), \
936c0fe4
AI
13191 (__mmask8)(U)))
13192
13193#define _mm_mask_shuffle_epi32(W, U, X, C) \
13194 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13195 (__v4si)(__m128i)(W), \
13196 (__mmask8)(U)))
13197
13198#define _mm_maskz_shuffle_epi32(U, X, C) \
13199 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
a25a7887 13200 (__v4si)(__m128i)_mm_setzero_si128 (), \
936c0fe4
AI
13201 (__mmask8)(U)))
13202
13203#define _mm256_rol_epi64(A, B) \
13204 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13205 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13206 (__mmask8)-1))
13207
13208#define _mm256_mask_rol_epi64(W, U, A, B) \
13209 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13210 (__v4di)(__m256i)(W), \
13211 (__mmask8)(U)))
13212
13213#define _mm256_maskz_rol_epi64(U, A, B) \
13214 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13215 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13216 (__mmask8)(U)))
13217
13218#define _mm_rol_epi64(A, B) \
13219 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13220 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13221 (__mmask8)-1))
13222
13223#define _mm_mask_rol_epi64(W, U, A, B) \
13224 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13225 (__v2di)(__m128i)(W), \
13226 (__mmask8)(U)))
13227
13228#define _mm_maskz_rol_epi64(U, A, B) \
13229 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13230 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13231 (__mmask8)(U)))
13232
13233#define _mm256_ror_epi64(A, B) \
13234 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13235 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13236 (__mmask8)-1))
13237
13238#define _mm256_mask_ror_epi64(W, U, A, B) \
13239 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13240 (__v4di)(__m256i)(W), \
13241 (__mmask8)(U)))
13242
13243#define _mm256_maskz_ror_epi64(U, A, B) \
13244 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13245 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13246 (__mmask8)(U)))
13247
13248#define _mm_ror_epi64(A, B) \
13249 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13250 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13251 (__mmask8)-1))
13252
13253#define _mm_mask_ror_epi64(W, U, A, B) \
13254 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13255 (__v2di)(__m128i)(W), \
13256 (__mmask8)(U)))
13257
13258#define _mm_maskz_ror_epi64(U, A, B) \
13259 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13260 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13261 (__mmask8)(U)))
13262
13263#define _mm256_rol_epi32(A, B) \
13264 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13265 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13266 (__mmask8)-1))
13267
13268#define _mm256_mask_rol_epi32(W, U, A, B) \
13269 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13270 (__v8si)(__m256i)(W), \
13271 (__mmask8)(U)))
13272
13273#define _mm256_maskz_rol_epi32(U, A, B) \
13274 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13275 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13276 (__mmask8)(U)))
13277
13278#define _mm_rol_epi32(A, B) \
13279 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13280 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13281 (__mmask8)-1))
13282
13283#define _mm_mask_rol_epi32(W, U, A, B) \
13284 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13285 (__v4si)(__m128i)(W), \
13286 (__mmask8)(U)))
13287
13288#define _mm_maskz_rol_epi32(U, A, B) \
13289 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13290 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13291 (__mmask8)(U)))
13292
13293#define _mm256_ror_epi32(A, B) \
13294 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13295 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13296 (__mmask8)-1))
13297
13298#define _mm256_mask_ror_epi32(W, U, A, B) \
13299 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13300 (__v8si)(__m256i)(W), \
13301 (__mmask8)(U)))
13302
13303#define _mm256_maskz_ror_epi32(U, A, B) \
13304 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887
JJ
13305 (__v8si)(__m256i) \
13306 _mm256_setzero_si256 (), \
936c0fe4
AI
13307 (__mmask8)(U)))
13308
13309#define _mm_ror_epi32(A, B) \
13310 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13311 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13312 (__mmask8)-1))
13313
13314#define _mm_mask_ror_epi32(W, U, A, B) \
13315 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13316 (__v4si)(__m128i)(W), \
13317 (__mmask8)(U)))
13318
13319#define _mm_maskz_ror_epi32(U, A, B) \
13320 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13321 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13322 (__mmask8)(U)))
13323
13324#define _mm256_alignr_epi32(X, Y, C) \
13325 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13326 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13327
13328#define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13329 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13330 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13331
13332#define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13333 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13334 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13335 (__mmask8)(U)))
13336
13337#define _mm256_alignr_epi64(X, Y, C) \
13338 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13339 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13340
13341#define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13342 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13343 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13344
13345#define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13346 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13347 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13348 (__mmask8)(U)))
13349
13350#define _mm_alignr_epi32(X, Y, C) \
13351 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13352 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13353
13354#define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13355 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13356 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13357
13358#define _mm_maskz_alignr_epi32(U, X, Y, C) \
13359 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
a25a7887 13360 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13361 (__mmask8)(U)))
13362
13363#define _mm_alignr_epi64(X, Y, C) \
13364 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13365 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13366
13367#define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13368 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13369 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13370
13371#define _mm_maskz_alignr_epi64(U, X, Y, C) \
13372 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
a25a7887 13373 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13374 (__mmask8)(U)))
13375
13376#define _mm_mask_cvtps_ph(W, U, A, I) \
13377 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13378 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13379
13380#define _mm_maskz_cvtps_ph(U, A, I) \
13381 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
a25a7887 13382 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
936c0fe4
AI
13383
13384#define _mm256_mask_cvtps_ph(W, U, A, I) \
13385 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13386 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13387
13388#define _mm256_maskz_cvtps_ph(U, A, I) \
13389 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
a25a7887 13390 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
936c0fe4
AI
13391
13392#define _mm256_mask_srai_epi32(W, U, A, B) \
13393 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13394 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13395
13396#define _mm256_maskz_srai_epi32(U, A, B) \
13397 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
a25a7887 13398 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
936c0fe4
AI
13399
13400#define _mm_mask_srai_epi32(W, U, A, B) \
13401 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13402 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13403
13404#define _mm_maskz_srai_epi32(U, A, B) \
13405 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
a25a7887 13406 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
13407
13408#define _mm256_srai_epi64(A, B) \
13409 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13410 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13411
13412#define _mm256_mask_srai_epi64(W, U, A, B) \
13413 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13414 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13415
13416#define _mm256_maskz_srai_epi64(U, A, B) \
13417 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13418 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13419
13420#define _mm_srai_epi64(A, B) \
13421 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 13422 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
936c0fe4
AI
13423
13424#define _mm_mask_srai_epi64(W, U, A, B) \
13425 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13426 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13427
13428#define _mm_maskz_srai_epi64(U, A, B) \
13429 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 13430 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
13431
13432#define _mm256_mask_permutex_pd(W, U, A, B) \
13433 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13434 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13435
13436#define _mm256_maskz_permutex_pd(U, A, B) \
13437 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
a25a7887 13438 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
13439
13440#define _mm256_mask_permute_pd(W, U, X, C) \
13441 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13442 (__v4df)(__m256d)(W), \
13443 (__mmask8)(U)))
13444
13445#define _mm256_maskz_permute_pd(U, X, C) \
13446 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
a25a7887 13447 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
13448 (__mmask8)(U)))
13449
13450#define _mm256_mask_permute_ps(W, U, X, C) \
13451 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13452 (__v8sf)(__m256)(W), (__mmask8)(U)))
13453
13454#define _mm256_maskz_permute_ps(U, X, C) \
13455 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
a25a7887 13456 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
13457 (__mmask8)(U)))
13458
13459#define _mm_mask_permute_pd(W, U, X, C) \
13460 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13461 (__v2df)(__m128d)(W), (__mmask8)(U)))
13462
13463#define _mm_maskz_permute_pd(U, X, C) \
13464 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
a25a7887 13465 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
13466 (__mmask8)(U)))
13467
13468#define _mm_mask_permute_ps(W, U, X, C) \
13469 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13470 (__v4sf)(__m128)(W), (__mmask8)(U)))
13471
13472#define _mm_maskz_permute_ps(U, X, C) \
13473 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
a25a7887 13474 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
13475 (__mmask8)(U)))
13476
13477#define _mm256_mask_blend_pd(__U, __A, __W) \
13478 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13479 (__v4df) (__W), \
13480 (__mmask8) (__U)))
13481
13482#define _mm256_mask_blend_ps(__U, __A, __W) \
13483 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13484 (__v8sf) (__W), \
13485 (__mmask8) (__U)))
13486
13487#define _mm256_mask_blend_epi64(__U, __A, __W) \
13488 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13489 (__v4di) (__W), \
13490 (__mmask8) (__U)))
13491
13492#define _mm256_mask_blend_epi32(__U, __A, __W) \
13493 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13494 (__v8si) (__W), \
13495 (__mmask8) (__U)))
13496
13497#define _mm_mask_blend_pd(__U, __A, __W) \
13498 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13499 (__v2df) (__W), \
13500 (__mmask8) (__U)))
13501
13502#define _mm_mask_blend_ps(__U, __A, __W) \
13503 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13504 (__v4sf) (__W), \
13505 (__mmask8) (__U)))
13506
13507#define _mm_mask_blend_epi64(__U, __A, __W) \
13508 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13509 (__v2di) (__W), \
13510 (__mmask8) (__U)))
13511
13512#define _mm_mask_blend_epi32(__U, __A, __W) \
13513 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13514 (__v4si) (__W), \
13515 (__mmask8) (__U)))
13516
13517#define _mm256_cmp_epu32_mask(X, Y, P) \
13518 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13519 (__v8si)(__m256i)(Y), (int)(P),\
13520 (__mmask8)-1))
13521
13522#define _mm256_cmp_epi64_mask(X, Y, P) \
13523 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13524 (__v4di)(__m256i)(Y), (int)(P),\
13525 (__mmask8)-1))
13526
13527#define _mm256_cmp_epi32_mask(X, Y, P) \
13528 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13529 (__v8si)(__m256i)(Y), (int)(P),\
13530 (__mmask8)-1))
13531
13532#define _mm256_cmp_epu64_mask(X, Y, P) \
13533 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13534 (__v4di)(__m256i)(Y), (int)(P),\
13535 (__mmask8)-1))
13536
13537#define _mm256_cmp_pd_mask(X, Y, P) \
13538 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13539 (__v4df)(__m256d)(Y), (int)(P),\
13540 (__mmask8)-1))
13541
13542#define _mm256_cmp_ps_mask(X, Y, P) \
13543 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13544 (__v8sf)(__m256)(Y), (int)(P),\
13545 (__mmask8)-1))
13546
13547#define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13548 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13549 (__v4di)(__m256i)(Y), (int)(P),\
13550 (__mmask8)(M)))
13551
13552#define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13553 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13554 (__v8si)(__m256i)(Y), (int)(P),\
13555 (__mmask8)(M)))
13556
13557#define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13558 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13559 (__v4di)(__m256i)(Y), (int)(P),\
13560 (__mmask8)(M)))
13561
13562#define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13563 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13564 (__v8si)(__m256i)(Y), (int)(P),\
13565 (__mmask8)(M)))
13566
13567#define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13568 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13569 (__v4df)(__m256d)(Y), (int)(P),\
13570 (__mmask8)(M)))
13571
13572#define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13573 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13574 (__v8sf)(__m256)(Y), (int)(P),\
13575 (__mmask8)(M)))
13576
13577#define _mm_cmp_epi64_mask(X, Y, P) \
13578 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13579 (__v2di)(__m128i)(Y), (int)(P),\
13580 (__mmask8)-1))
13581
13582#define _mm_cmp_epi32_mask(X, Y, P) \
13583 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13584 (__v4si)(__m128i)(Y), (int)(P),\
13585 (__mmask8)-1))
13586
13587#define _mm_cmp_epu64_mask(X, Y, P) \
13588 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13589 (__v2di)(__m128i)(Y), (int)(P),\
13590 (__mmask8)-1))
13591
13592#define _mm_cmp_epu32_mask(X, Y, P) \
13593 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13594 (__v4si)(__m128i)(Y), (int)(P),\
13595 (__mmask8)-1))
13596
13597#define _mm_cmp_pd_mask(X, Y, P) \
13598 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13599 (__v2df)(__m128d)(Y), (int)(P),\
13600 (__mmask8)-1))
13601
13602#define _mm_cmp_ps_mask(X, Y, P) \
13603 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13604 (__v4sf)(__m128)(Y), (int)(P),\
13605 (__mmask8)-1))
13606
13607#define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13608 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13609 (__v2di)(__m128i)(Y), (int)(P),\
13610 (__mmask8)(M)))
13611
13612#define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13613 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13614 (__v4si)(__m128i)(Y), (int)(P),\
13615 (__mmask8)(M)))
13616
13617#define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13618 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13619 (__v2di)(__m128i)(Y), (int)(P),\
13620 (__mmask8)(M)))
13621
13622#define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13623 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13624 (__v4si)(__m128i)(Y), (int)(P),\
13625 (__mmask8)(M)))
13626
13627#define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13628 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13629 (__v2df)(__m128d)(Y), (int)(P),\
13630 (__mmask8)(M)))
13631
13632#define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13633 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13634 (__v4sf)(__m128)(Y), (int)(P),\
13635 (__mmask8)(M)))
13636
13637#endif
13638
a25a7887 13639#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
936c0fe4
AI
13640
13641#ifdef __DISABLE_AVX512VL__
13642#undef __DISABLE_AVX512VL__
13643#pragma GCC pop_options
13644#endif /* __DISABLE_AVX512VL__ */
13645
13646#endif /* _AVX512VLINTRIN_H_INCLUDED */